{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Recommender Systems 2020/21\n",
    "\n",
    "### Practice 3 - Content Based recommenders\n",
    "\n",
    "\n",
    "### Load the data you saw last time:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from urllib.request import urlretrieve\n",
    "import zipfile, os\n",
    "\n",
    "# If file exists, skip the download\n",
    "data_file_path = \"data/Movielens_10M/\"\n",
    "data_file_name = data_file_path + \"movielens_10m.zip\"\n",
    "\n",
    "# If directory does not exist, create\n",
    "if not os.path.exists(data_file_path):\n",
    "    os.makedirs(data_file_path)\n",
    "\n",
    "if not os.path.exists(data_file_name):\n",
    "    urlretrieve (\"http://files.grouplens.org/datasets/movielens/ml-10m.zip\", data_file_name)\n",
    "    \n",
    "dataFile = zipfile.ZipFile(data_file_name)\n",
    "URM_path = dataFile.extract(\"ml-10M100K/ratings.dat\", path=\"data/Movielens_10M\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\ferra\\Anaconda3\\envs\\RecSysFramework\\lib\\site-packages\\ipykernel_launcher.py:6: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n",
      "  \n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "URM_all_dataframe = pd.read_csv(filepath_or_buffer=URM_path, \n",
    "                                sep=\"::\", \n",
    "                                header=None, \n",
    "                                dtype={0:int, 1:int, 2:float, 3:int})\n",
    "\n",
    "URM_all_dataframe.columns = [\"UserID\", \"ItemID\", \"Interaction\", \"Timestamp\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>UserID</th>\n",
       "      <th>ItemID</th>\n",
       "      <th>Interaction</th>\n",
       "      <th>Timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>122</td>\n",
       "      <td>5.0</td>\n",
       "      <td>838985046</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>185</td>\n",
       "      <td>5.0</td>\n",
       "      <td>838983525</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>231</td>\n",
       "      <td>5.0</td>\n",
       "      <td>838983392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>292</td>\n",
       "      <td>5.0</td>\n",
       "      <td>838983421</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>316</td>\n",
       "      <td>5.0</td>\n",
       "      <td>838983392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1</td>\n",
       "      <td>329</td>\n",
       "      <td>5.0</td>\n",
       "      <td>838983392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1</td>\n",
       "      <td>355</td>\n",
       "      <td>5.0</td>\n",
       "      <td>838984474</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1</td>\n",
       "      <td>356</td>\n",
       "      <td>5.0</td>\n",
       "      <td>838983653</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1</td>\n",
       "      <td>362</td>\n",
       "      <td>5.0</td>\n",
       "      <td>838984885</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1</td>\n",
       "      <td>364</td>\n",
       "      <td>5.0</td>\n",
       "      <td>838983707</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   UserID  ItemID  Interaction  Timestamp\n",
       "0       1     122          5.0  838985046\n",
       "1       1     185          5.0  838983525\n",
       "2       1     231          5.0  838983392\n",
       "3       1     292          5.0  838983421\n",
       "4       1     316          5.0  838983392\n",
       "5       1     329          5.0  838983392\n",
       "6       1     355          5.0  838984474\n",
       "7       1     356          5.0  838983653\n",
       "8       1     362          5.0  838984885\n",
       "9       1     364          5.0  838983707"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "URM_all_dataframe.head(n=10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## And now load the content informations in the same way:\n",
    "## In this case we are using tags. The ICM also contains the user that added the tag in column 0 (but we don't use that information)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\ferra\\Anaconda3\\envs\\RecSysFramework\\lib\\site-packages\\ipykernel_launcher.py:6: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n",
      "  \n"
     ]
    }
   ],
   "source": [
    "ICM_path = dataFile.extract(\"ml-10M100K/tags.dat\", path = \"data/Movielens_10M\")\n",
    "\n",
    "ICM_dataframe = pd.read_csv(filepath_or_buffer=ICM_path, \n",
    "                            sep=\"::\", \n",
    "                            header=None, \n",
    "                            dtype={0:int, 1:int, 2:str, 3:int})\n",
    "\n",
    "ICM_dataframe.columns = [\"UserID\", \"ItemID\", \"FeatureID\", \"Timestamp\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>UserID</th>\n",
       "      <th>ItemID</th>\n",
       "      <th>FeatureID</th>\n",
       "      <th>Timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>15</td>\n",
       "      <td>4973</td>\n",
       "      <td>excellent!</td>\n",
       "      <td>1215184630</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>20</td>\n",
       "      <td>1747</td>\n",
       "      <td>politics</td>\n",
       "      <td>1188263867</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20</td>\n",
       "      <td>1747</td>\n",
       "      <td>satire</td>\n",
       "      <td>1188263867</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>20</td>\n",
       "      <td>2424</td>\n",
       "      <td>chick flick 212</td>\n",
       "      <td>1188263835</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>20</td>\n",
       "      <td>2424</td>\n",
       "      <td>hanks</td>\n",
       "      <td>1188263835</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>20</td>\n",
       "      <td>2424</td>\n",
       "      <td>ryan</td>\n",
       "      <td>1188263835</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>20</td>\n",
       "      <td>2947</td>\n",
       "      <td>action</td>\n",
       "      <td>1188263755</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>20</td>\n",
       "      <td>2947</td>\n",
       "      <td>bond</td>\n",
       "      <td>1188263756</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>20</td>\n",
       "      <td>3033</td>\n",
       "      <td>spoof</td>\n",
       "      <td>1188263880</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>20</td>\n",
       "      <td>3033</td>\n",
       "      <td>star wars</td>\n",
       "      <td>1188263880</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   UserID  ItemID        FeatureID   Timestamp\n",
       "0      15    4973       excellent!  1215184630\n",
       "1      20    1747         politics  1188263867\n",
       "2      20    1747           satire  1188263867\n",
       "3      20    2424  chick flick 212  1188263835\n",
       "4      20    2424            hanks  1188263835\n",
       "5      20    2424             ryan  1188263835\n",
       "6      20    2947           action  1188263755\n",
       "7      20    2947             bond  1188263756\n",
       "8      20    3033            spoof  1188263880\n",
       "9      20    3033        star wars  1188263880"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ICM_dataframe.head(n=10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### We can see that most users and items have no data associated to them"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of tags\t 16529, Number of item-tag tuples 95580\n"
     ]
    }
   ],
   "source": [
    "n_features = len(ICM_dataframe[\"FeatureID\"].unique())\n",
    "\n",
    "print (\"Number of tags\\t {}, Number of item-tag tuples {}\".format(n_features, len(ICM_dataframe)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## We now build the sparse URM and ICM matrices\n",
    "\n",
    "### The tags are strings, we should traslate them into numbers so we can use them as indices in the ICM.\n",
    "### We should also ensure that the item and user indices we use in ICM and URM are consistent. To do so we use the same mapper, first we populate it with the URM and then we add the new ids that appear only in the ICM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Unique user_id in the URM are 69878\n",
      "Unique user_id in the URM and ICM are 71567\n"
     ]
    }
   ],
   "source": [
    "user_original_ID_to_index_dict = {}\n",
    "\n",
    "for user_id in URM_all_dataframe[\"UserID\"].unique():\n",
    "    user_original_ID_to_index_dict[user_id] = len(user_original_ID_to_index_dict)  \n",
    "\n",
    "print(\"Unique user_id in the URM are {}\".format(len(user_original_ID_to_index_dict)))\n",
    "    \n",
    "for user_id in ICM_dataframe[\"UserID\"].unique():\n",
    "    if user_id not in user_original_ID_to_index_dict:\n",
    "        user_original_ID_to_index_dict[user_id] = len(user_original_ID_to_index_dict)\n",
    "        \n",
    "print(\"Unique user_id in the URM and ICM are {}\".format(len(user_original_ID_to_index_dict)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Unique item_id in the URM are 10677\n",
      "Unique item_id in the URM and ICM are 10681\n"
     ]
    }
   ],
   "source": [
    "item_original_ID_to_index_dict = {}\n",
    "\n",
    "for item_id in URM_all_dataframe[\"ItemID\"].unique():\n",
    "    item_original_ID_to_index_dict[item_id] = len(item_original_ID_to_index_dict)\n",
    "\n",
    "print(\"Unique item_id in the URM are {}\".format(len(item_original_ID_to_index_dict)))\n",
    "    \n",
    "for item_id in ICM_dataframe[\"ItemID\"].unique():\n",
    "    if item_id not in item_original_ID_to_index_dict:\n",
    "        item_original_ID_to_index_dict[item_id] = len(item_original_ID_to_index_dict)\n",
    "        \n",
    "print(\"Unique item_id in the URM and ICM are {}\".format(len(item_original_ID_to_index_dict)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Unique feature_id in the URM are 16529\n"
     ]
    }
   ],
   "source": [
    "feature_original_ID_to_index_dict = {}\n",
    "\n",
    "for feature_id in ICM_dataframe[\"FeatureID\"].unique():\n",
    "    feature_original_ID_to_index_dict[feature_id] = len(feature_original_ID_to_index_dict)\n",
    "\n",
    "print(\"Unique feature_id in the URM are {}\".format(len(feature_original_ID_to_index_dict)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "New index for feature 'star wars' is 9\n"
     ]
    }
   ],
   "source": [
    "original_feature_ID = \"star wars\"\n",
    "print(\"New index for feature '{}' is {}\".format(original_feature_ID, feature_original_ID_to_index_dict[original_feature_ID]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# We can now build the URM and ICM using the new indices\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "URM_all_dataframe[\"UserID\"] = [user_original_ID_to_index_dict[user_original] for user_original in\n",
    "                                      URM_all_dataframe[\"UserID\"].values]\n",
    "\n",
    "URM_all_dataframe[\"ItemID\"] = [item_original_ID_to_index_dict[item_original] for item_original in \n",
    "                                      URM_all_dataframe[\"ItemID\"].values]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>UserID</th>\n",
       "      <th>ItemID</th>\n",
       "      <th>Interaction</th>\n",
       "      <th>Timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>838985046</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>5.0</td>\n",
       "      <td>838983525</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>5.0</td>\n",
       "      <td>838983392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>5.0</td>\n",
       "      <td>838983421</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>5.0</td>\n",
       "      <td>838983392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>838983392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>5.0</td>\n",
       "      <td>838984474</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>5.0</td>\n",
       "      <td>838983653</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>5.0</td>\n",
       "      <td>838984885</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>5.0</td>\n",
       "      <td>838983707</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   UserID  ItemID  Interaction  Timestamp\n",
       "0       0       0          5.0  838985046\n",
       "1       0       1          5.0  838983525\n",
       "2       0       2          5.0  838983392\n",
       "3       0       3          5.0  838983421\n",
       "4       0       4          5.0  838983392\n",
       "5       0       5          5.0  838983392\n",
       "6       0       6          5.0  838984474\n",
       "7       0       7          5.0  838983653\n",
       "8       0       8          5.0  838984885\n",
       "9       0       9          5.0  838983707"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "URM_all_dataframe.head(n=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "ICM_dataframe[\"UserID\"] = [user_original_ID_to_index_dict[user_original] for user_original in\n",
    "                                      ICM_dataframe[\"UserID\"].values]\n",
    "\n",
    "ICM_dataframe[\"ItemID\"] = [item_original_ID_to_index_dict[item_original] for item_original in \n",
    "                                      ICM_dataframe[\"ItemID\"].values]\n",
    "\n",
    "ICM_dataframe[\"FeatureID\"] = [feature_original_ID_to_index_dict[feature_original] for feature_original in \n",
    "                                      ICM_dataframe[\"FeatureID\"].values]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>UserID</th>\n",
       "      <th>ItemID</th>\n",
       "      <th>FeatureID</th>\n",
       "      <th>Timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>69878</td>\n",
       "      <td>1926</td>\n",
       "      <td>0</td>\n",
       "      <td>1215184630</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>69879</td>\n",
       "      <td>1040</td>\n",
       "      <td>1</td>\n",
       "      <td>1188263867</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>69879</td>\n",
       "      <td>1040</td>\n",
       "      <td>2</td>\n",
       "      <td>1188263867</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>69879</td>\n",
       "      <td>1086</td>\n",
       "      <td>3</td>\n",
       "      <td>1188263835</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>69879</td>\n",
       "      <td>1086</td>\n",
       "      <td>4</td>\n",
       "      <td>1188263835</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>69879</td>\n",
       "      <td>1086</td>\n",
       "      <td>5</td>\n",
       "      <td>1188263835</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>69879</td>\n",
       "      <td>612</td>\n",
       "      <td>6</td>\n",
       "      <td>1188263755</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>69879</td>\n",
       "      <td>612</td>\n",
       "      <td>7</td>\n",
       "      <td>1188263756</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>69879</td>\n",
       "      <td>628</td>\n",
       "      <td>8</td>\n",
       "      <td>1188263880</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>69879</td>\n",
       "      <td>628</td>\n",
       "      <td>9</td>\n",
       "      <td>1188263880</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   UserID  ItemID  FeatureID   Timestamp\n",
       "0   69878    1926          0  1215184630\n",
       "1   69879    1040          1  1188263867\n",
       "2   69879    1040          2  1188263867\n",
       "3   69879    1086          3  1188263835\n",
       "4   69879    1086          4  1188263835\n",
       "5   69879    1086          5  1188263835\n",
       "6   69879     612          6  1188263755\n",
       "7   69879     612          7  1188263756\n",
       "8   69879     628          8  1188263880\n",
       "9   69879     628          9  1188263880"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ICM_dataframe.head(n=10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### When we create the matrices we need to make sure they have the same shape. As we have seen some items and users only appear in one of the two matrices and are missing in the other (if they have no tags or no interactions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "import scipy.sparse as sps\n",
    "import numpy as np\n",
    "\n",
    "n_users = len(user_original_ID_to_index_dict)\n",
    "n_items = len(item_original_ID_to_index_dict)\n",
    "n_features = len(feature_original_ID_to_index_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<71567x10681 sparse matrix of type '<class 'numpy.float64'>'\n",
       "\twith 10000054 stored elements in Compressed Sparse Row format>"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "URM_all = sps.csr_matrix((URM_all_dataframe[\"Interaction\"].values, \n",
    "                          (URM_all_dataframe[\"UserID\"].values, URM_all_dataframe[\"ItemID\"].values)),\n",
    "                        shape = (n_users, n_items))\n",
    "\n",
    "URM_all"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<10681x16529 sparse matrix of type '<class 'numpy.float64'>'\n",
       "\twith 71155 stored elements in Compressed Sparse Row format>"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ICM_all = sps.csr_matrix((np.ones(len(ICM_dataframe[\"ItemID\"].values)), \n",
    "                          (ICM_dataframe[\"ItemID\"].values, ICM_dataframe[\"FeatureID\"].values)),\n",
    "                        shape = (n_items, n_features))\n",
    "\n",
    "ICM_all.data = np.ones_like(ICM_all.data)\n",
    "\n",
    "ICM_all"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Let's take a look at the ICM\n",
    "\n",
    "### We leverage CSR and CSC indptr data structure to compute the number of cells that have values for that row or column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "ICM_all = sps.csr_matrix(ICM_all)\n",
    "features_per_item = np.ediff1d(ICM_all.indptr)\n",
    "\n",
    "ICM_all = sps.csc_matrix(ICM_all)\n",
    "items_per_feature = np.ediff1d(ICM_all.indptr)\n",
    "\n",
    "ICM_all = sps.csr_matrix(ICM_all)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10681,)\n",
      "(16529,)\n"
     ]
    }
   ],
   "source": [
    "print(features_per_item.shape)\n",
    "print(items_per_feature.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "features_per_item = np.sort(features_per_item)\n",
    "items_per_feature = np.sort(items_per_feature)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAbCElEQVR4nO3df5RcZZ3n8fcnCQmEXyHSsCEJdNAcdhKcBexlQD07rMGBQSSOI2MwshHZzZmFAX/MHk0mI86uJ+cw6zijszOofQCJ0MMP0ZXIooIR8Mwugo2AJIEAY0wIiaRVQIZgyI/v/nGfrhRtdaeqcm/dqurP65w+Vfe5de/zffKjvv08z73PVURgZmYGMKHsAMzMrH04KZiZWYWTgpmZVTgpmJlZhZOCmZlVTCo7gANx9NFHR29vb9lhmJl1lIcffvgXEdFTa19HJ4Xe3l4GBwfLDsPMrKNI2jTaPg8fmZlZhZOCmZlVOCmYmVmFk4KZmVU4KZiZWYWTgplZJxkYgN5emDAhex0YyPX0HX1JqpnZuDIwAEuXwo4d2famTdk2wOLFuVThnoKZWadYsWJfQhi2Y0dWnhMnBTOzTrF5c2PlTXBSMDPrFMcf31h5E5wUzMw6xcqVMHXq68umTs3Kc+KkYGbWKRYvhv5+OOEEkLLX/v7cJpnBVx+ZmXWWxYtzTQIjFdZTkHS9pO2S1tbY998khaSjq8qWS3pG0gZJ5xQVl5mZja7I4aMbgHNHFkqaDbwT2FxVNg9YBMxPx1wjaWKBsZmZWQ2FJYWI+AHwqxq7/g74BBBVZQuBWyJiZ0RsBJ4BTi8qNjMzq62lE82SLgCei4jHRuyaCTxbtb0lldU6x1JJg5IGh4aGCorUzGx8allSkDQVWAFcVWt3jbKoUUZE9EdEX0T09fTUfJqcmZk1qZVXH70RmAM8JglgFvBjSaeT9QxmV312FrC1hbGZmRkt7ClExOMRcUxE9EZEL1kiOC0ifg6sBhZJmiJpDjAXeKhVsZmZWabIS1JvBh4ATpK0RdKlo302ItYBtwHrge8Al0fEnqJiMzOz2gobPoqIi/azv3fE9kogv3u1zcysYV7mwszMKpwUzMyswknBzMwqnBTMzKzCScHMzCqcFMzMrMJJwczMKpwUzMyswknBzKyTXHYZTJqUPY5z0qRsO0d+HKeZWae47DL44hf3be/Zs2/7mmtyqcI9BTOzTtHf31h5E5wUzMw6xZ5R1gkdrbwJTgpmZp1i4iiPrh+tvAlOCmZmnWLp0sbKm+CJZjOzTjE8mdzfnw0ZTZyYJYScJpnBScHMrLNcc02uSWAkDx+ZmXWSgQHo7YUJE7LXgYFcT++egplZpxgYyIaLduzItjdt2jefsHhxLlUU+Yzm6yVtl7S2quyzkp6U9BNJ/1vStKp9yyU9I2mDpHOKisvMrGOtWLEvIQzbsSMrz0mRw0c3AOeOKLsHODkifhd4ClgOIGkesAiYn465RlJ+11iZmXWDzZsbK29CYUkhIn4A/GpE2d0RsTtt/hCYld4vBG6JiJ0RsRF4Bji9qNjMzDrS8cc3Vt6EMieaPwx8O72fCTxbtW9LKvstkpZKGpQ0ODQ0VHCIZmZtZOVKmDz59WWTJ2flOSklKUhaAewGhqfNVeNjUevYiOiPiL6I6Ovp6SkqRDOz9hQx9vYBanlSkLQEOB9YHFFpzRZgdtXHZgFbWx2bmVlbW7ECdu16fdmuXR0z0fxbJJ0LfBK4ICKqp9BXA4skTZE0B5gLPNTK2MzM2l4nTzRLuhl4ADhJ0hZJlwL/ABwO3CPpUUlfAoiIdcBtwHrgO8DlEZHfsn9mZt2gBRPNhd28FhEX1Si+bozPrwTymy0xM+s25533+ofsVJfnxMtcmJl1irvuaqy8CU4KZmadopPnFMzMLGddfvOamZk1YrS5A88pmJmNQ55TMDOzCs8pmJlZhecUzMyswnMKZmZW4TkFMzOr2LSpsfImOCmYmXWKCaN8ZY9W3kwVuZ3JzMyKtXdvY+VNcFIwM7MKJwUzM6twUjAzswonBTMzq3BSMDOzCicFM7NOceihjZU3ochnNF8vabuktVVl0yXdI+np9HpU1b7lkp6RtEHSOUXFZWbWsb785d++J2HChKw8J0X2FG4Azh1RtgxYExFzgTVpG0nzgEXA/HTMNZImFhibmVnnWbwYvvpVOOEEkLLXr341K89JYUkhIn4A/GpE8UJgVXq/CnhPVfktEbEzIjYCzwCnFxWbmZnVNqnF9R0bEdsAImKbpGNS+Uzgh1Wf25LKzMxs2Nlnw5o1+7Y3bYKlS7P3OfUW2mWiWTXKouYHpaWSBiUNDg0NFRyWmVmbuOyy1yeEYTt2wIoVuVXT6qTwvKQZAOl1eyrfAsyu+twsYGutE0REf0T0RURfT09PocGambWN/v7R93Xwk9dWA0vS+yXAHVXliyRNkTQHmAs81OLYzMza1549o+/L8clrhc0pSLoZOAs4WtIW4NPA1cBtki4FNgMXAkTEOkm3AeuB3cDlETHGn4CZmVWsXJnbqRRRc+i+I/T19cXg4GDZYZiZFU+1pl6TBr/HJT0cEX219u13+EjSGyVNSe/PknSlpGkNRWBmZh2hnjmFrwN7JL0JuA6YA/xToVGZmVkp6kkKeyNiN/BHwOcj4mPAjGLDMjOzMtSTFHZJuojsaqE7U9lBxYVkZmZlqScpXAKcCayMiI3pktGbig3LzMzKsN9LUiNivaRPAsen7Y1kl5aamVmXqefqo3cDjwLfSdunSFpddGBmZlZl8uTGyptUz/DRX5GtWPoiQEQ8SnYFkpmZtcpo9ymMdf9CE+pJCrsj4qURZZ17x5uZWSfaubOx8ibVs8zFWkkfACZKmgtcCfy/XKMwM7O2UE9P4QqyJ6LtJLtp7SXgo0UGZWZm5Rizp5Aeibk6Is4G8luw28zMGiPVXuOolXMKaaXSHZKOzLVWMzNrzGiL3uW8qGk9cwq/AR6XdA/wyr444spcIzEzs9LVkxT+T/oxM7MuV88dzataEYiZmY1h8mR47bXa5Tnab1KQtJEa9yVExIm5RmJmZqOrlRDGKm9SPcNH1U/nOZjsEZrTc43CzMzGNmEC7N1buzzPavb3gYj4ZdXPcxHxeeAdB1KppI9JWidpraSbJR0sabqkeyQ9nV6POpA6zMy6Sq2EMFZ5k+pZEO+0qp8+SX8KHN5shZJmkt0V3RcRJwMTgUXAMmBNRMwF1qRtMzNroXqGjz5X9X43sBH4kxzqPUTSLmAqsBVYDpyV9q8C7gM+eYD1mJlZA+pJCpdGxE+rC9KDdpoSEc9J+htgM/AqcHdE3C3p2IjYlj6zTdIxzdZhZtZ1pkypvfjdlCm5VlPPDMXtdZbVJc0VLCRbfvs44FBJH2zg+KWSBiUNDg0NNRuGmVlnKXuVVEn/lmwhvCMlvbdq1xFkVyE162xgY0QMpXq+AbwVeF7SjNRLmAFsr3VwRPQD/QB9fX1ewtvMLEdjDR+dBJwPTAPeXVX+MvBfDqDOzcAZkqaSDR8tAAbJltBYQvaozyXAHQdQh5mZNWHUpBARdwB3SDozIh7Iq8KIeFDS7cCPySauHyH7zf8w4DZJl5IljgvzqtPMzOpTz0TzI5IuJxtKqgwbRcSHm600Ij4NfHpE8U6yXoOZmVUbGGhZVfVMNN8I/BvgHOB+YBbZEJKZmbXCitY9zqaepPCmiPgU8EpaHO9dwJuLDcvMzCo2bWpZVfUkhV3p9UVJJwNHAr2FRWRmZq831vpGOT95rZ45hf50b8GngNVkE8JX5RqFmZmNbqz1jW68Mdeq6nmewrXp7f2Al8s2M2snixfnerp6FsQ7VtJ1kr6dtuely0bNzKzL1DOncAPwXbIlKQCeAj5aVEBmZlaeepLC0RFxG7AXICJ2A3sKjcrMzEpRT1J4RdIbSI/klHQG8FKhUZmZWSnqufro42RXHb1R0v8FeoD3FRqVmZmVYqxVUi+MiK8BLwC/T7ZAnoANEbFrtOPMzKxzjTV8tDy9fj0idkfEuohY64RgZta9xho++qWke4E5klaP3BkRFxQXlpmZlWGspPAu4DSyBfE+N8bnzMysS4z1PIXXgB9KeuvwU9LMzKy77feSVCcEM7Pxo577FMzMrCwzZ7a0OicFM7N2tnVrS6vb781rkuYAV5A9Q6HyeV99ZGbWfeq5o/mbwHXAt0jrHx0oSdOAa4GTyZbP+DCwAbiVLPn8DPiTiHghj/rMzLrSCSfkfsp6ksJvIuLvc673C8B3IuJ9kiYDU4G/ANZExNWSlgHLgE/mXK+ZWfdYuTL3Uyoixv6A9AFgLnA3sHO4PCJ+3FSF0hHAY8CJUVW5pA3AWRGxTdIM4L6IOGmsc/X19cXg4GAzYZiZdYaxHre5n+/v0U+phyOir9a+enoKbwYuBt7BvuGjSNvNOBEYAr4i6d8BDwMfAY6NiG0AKTEcU+tgSUuBpQDHH398kyGYmVkt9SSFPyL7rf61HOs8DbgiIh6U9AWyoaK6REQ/0A9ZTyGnmMzMjPouSX0MmJZjnVuALRHxYNq+nSxJPJ+GjUiv23Os08ys8wwMtLzKenoKxwJPSvoRr59TaOqS1Ij4uaRnJZ0UERuABcD69LMEuDq93tHM+c3MusbFF7e8ynqSwqcLqPcKYCBdefRT4BKyXsttki4FNgMXFlCvmVnnaHIi+UDsNylExP15VxoRjwK1Zr4X5F2XmZnVr547ml8mPZ8ZmAwcBLwSEUcUGZiZmY1hWp5TvfvU01M4vHpb0nuA0wuJxszM6vNCMQs+NLwgXkR8k+bvUTAzszZWz/DRe6s2J5DNBfj+ADOzIp19dinV1nP10bur3u8mW6xuYSHRmJlZZs2aUqqtZ07hklYEYmZm5Rs1KUi6aozjIiI+U0A8ZmZWorF6Cq/UKDsUuBR4A+CkYGbWZUZNChHxueH3kg4nW8n0EuAW4HOjHWdmZgdo/vyx9xd4p/OYcwqSpgMfBxYDq4DT/DQ0M7OCrV9fWtVjzSl8Fngv2TLVb46If21ZVGZmVoqxbl77c+A44C+BrZJ+nX5elvTr1oRnZmatNNacQsN3O5uZWcGOO67Q0/uL38ysnRx11Nj7n3uu0OqdFMzM2smLL5ZavZOCmZlVOCmYmVmFk4KZWbuQxt5f8CQzlJgUJE2U9IikO9P2dEn3SHo6ve5ntsXMbJwpeJIZyu0pfAR4omp7GbAmIuYCa9K2mZm1UClJQdIs4F3AtVXFC8mW0iC9vqfVcZmZlWZ/Q0ctUlZP4fPAJ4C9VWXHRsQ2gPR6TK0DJS2VNChpcGhoqPhIzczawUEHtaSalicFSecD2yPi4WaOj4j+iOiLiL6enp6cozMza1OvvdaSaup5HGfe3gZcIOk84GDgCEk3Ac9LmhER2yTNALaXEJuZWetNnVp2BBUt7ylExPKImBURvcAi4PsR8UFgNbAkfWwJcEerYzMzK8Wrr469f8GC1sRBe92ncDXwTklPA+9M22Zm9r3vtayqMoaPKiLiPuC+9P6XQOvSoZlZO2iTq46GtVNPwcxsfJk8ef+fadFVR8OcFMzMyrJr1/4/06KrjoY5KZiZtauIllfppGBmVoY2m0sY5qRgZtZqbZoQwEnBzKy16k0IJQwdgZOCmZlVcVIwM2uVensJLXiYzmicFMzMWqGReYQWPExnNE4KZmZFayQhlDSXMMxJwcysXZScEMBJwcysOPPnt/Xlp7WUuiCemVnXajQZtEEvAdxTMDPLX4cmBHBSMDMrVxslBPDwkZlZfjps/qAW9xTMzPLQTEJos14COCmYmR24LkkIUEJSkDRb0r2SnpC0TtJHUvl0SfdIejq9HtXq2MzMGtbMpHKbJgQop6ewG/jziPgd4AzgcknzgGXAmoiYC6xJ22Zm7UnqijmEkVqeFCJiW0T8OL1/GXgCmAksBFalj60C3tPq2MzM6tJsMmjjHsKwUucUJPUCpwIPAsdGxDbIEgdwzCjHLJU0KGlwaGioVaGamWWaSQiHHNIRCQFKTAqSDgO+Dnw0In5d73ER0R8RfRHR19PTU1yAZmbVmh0uioAdO/KPpyClJAVJB5ElhIGI+EYqfl7SjLR/BrC9jNjMzIB9SaBL5w5GU8bVRwKuA56IiL+t2rUaWJLeLwHuaHVsZmZAfkmgQ4aMqpVxR/PbgIuBxyU9msr+ArgauE3SpcBm4MISYjOz8SyPZDBhAuzZc+DnKUnLk0JE/DMw2p/8glbGYmbjXN7DQvPmwbp1+Z6zxbz2kZmNL0XND3RBQgAnBTMbT4pKCB04dzAar31kZt1rYKC4K4gmTGj7JSua4Z6CmXWHmTNh69bi6+myJDCSk4KZdZ6y7hvo8oQAHj4ys07TyoQwb96+IaJxkBDAPQUza2dl9AjGyZf/aNxTMLPynH32by8nUdbSEuOoNzAW9xTMrDidsmaQk0GFk4KZFaNdE4ITwJg8fGRmjZk/f+whn3ZaVbR6ktjDQ3VxT8FsvGuXL/ADNW0avPBC2VF0PPcUzMYzJwQbwUnBrNPt7wqedh/iaUT10hLVP04IufHwkVmrzZ8P69eXHUX78rh/qZwUbHzrxN+Wu4G/+NuWh49s/HJCKIcTQltzT8HyNXUqvPpq2VFYq/mLvmu0XVKQdC7wBWAicG1EXF1AJbmf0qzr+It+XGqr4SNJE4F/BP4QmAdcJGlezpXkejqzruSEMG61VVIATgeeiYifRsRrwC3AwpJjMus8tS7bbOTHxq12SwozgWertreksgpJSyUNShocGhpqaXBmhTnQL3F/qVtO2i0p1Brbed2/8Ijoj4i+iOjr6elpUVg2rixYkP+XtL/ErUO0W1LYAsyu2p4FtOChq2bJggXwve+VHYVZadrt6qMfAXMlzQGeAxYBH8i1hghPNreafxM26xhtlRQiYrekPwO+S3ZJ6vURsa6AinI/pZlZN2irpAAQEXcBd5Udh5nZeNRucwpmZlYiJwUzM6twUjAzswonBTMzq1B08JU4koaATQdwiqOBX+QUTrtyG7uD29gd2qWNJ0REzbt/OzopHChJgxHRV3YcRXIbu4Pb2B06oY0ePjIzswonBTMzqxjvSaG/7ABawG3sDm5jd2j7No7rOQUzM3u98d5TMDOzKk4KZmZWMS6TgqRzJW2Q9IykZWXH0whJsyXdK+kJSeskfSSVT5d0j6Sn0+tRVccsT23dIOmcqvK3SHo87ft7qX3WFJc0UdIjku5M213VPgBJ0yTdLunJ9Pd5Zje1U9LH0r/RtZJulnRwN7RP0vWStktaW1WWW7skTZF0ayp/UFJvK9tHRIyrH7Iluf8FOBGYDDwGzCs7rgbinwGclt4fDjwFzAP+J7AslS8D/jq9n5faOAWYk9o+Me17CDiT7Il33wb+sOz2VbXz48A/AXem7a5qX4pvFfCf0/vJwLRuaSfZY3Q3Aoek7duAD3VD+4D/AJwGrK0qy61dwGXAl9L7RcCtLW1f2f94SvgLPRP4btX2cmB52XEdQHvuAN4JbABmpLIZwIZa7SN7VsWZ6TNPVpVfBHy57PakWGYBa4B3VCWFrmlfiueI9KWpEeVd0U72PW99OtkS/XcCf9BF7esdkRRya9fwZ9L7SWR3QKuotoz8GY/DR8P/WIdtSWUdJ3UrTwUeBI6NiG0A6fWY9LHR2jszvR9Z3g4+D3wC2FtV1k3tg6ynOgR8JQ2TXSvpULqknRHxHPA3wGZgG/BSRNxNl7SvhjzbVTkmInYDLwFvKCzyEcZjUqg1Htlx1+VKOgz4OvDRiPj1WB+tURZjlJdK0vnA9oh4uN5DapS1bfuqTCIbgvhiRJwKvEI27DCajmpnGlNfSDZkchxwqKQPjnVIjbK2bV8DmmlXqW0ej0lhCzC7ansWsLWkWJoi6SCyhDAQEd9Ixc9LmpH2zwC2p/LR2rslvR9ZXra3ARdI+hlwC/AOSTfRPe0btgXYEhEPpu3byZJEt7TzbGBjRAxFxC7gG8Bb6Z72jZRnuyrHSJoEHAn8qrDIRxiPSeFHwFxJcyRNJpvIWV1yTHVLVyhcBzwREX9btWs1sCS9X0I21zBcvihd0TAHmAs8lLq4L0s6I53zP1UdU5qIWB4RsyKil+zv5vsR8UG6pH3DIuLnwLOSTkpFC4D1dE87NwNnSJqa4loAPEH3tG+kPNtVfa73kf0faF3vqOwJmzJ+gPPIrtr5F2BF2fE0GPvbybqSPwEeTT/nkY05rgGeTq/Tq45Zkdq6gaorN4A+YG3a9w+0cDKrzraexb6J5m5s3ynAYPq7/CZwVDe1E/jvwJMpthvJrsDp+PYBN5PNk+wi+63+0jzbBRwMfA14huwKpRNb2T4vc2FmZhXjcfjIzMxG4aRgZmYVTgpmZlbhpGBmZhVOCmZmVuGkYF1N0oq0UudPJD0q6fcaPP5Dko5r8Jje6hU0q8qPk3R7en+KpPMaOa9ZK0wqOwCzokg6EzifbFXZnZKOJluNtN7jJ5Kt7LmWHO6ijYitZDcjQXaPQh9w14Ge1yxP7ilYN5sB/CIidgJExC/SFzOSFqSF6B5P6+NPSeU/k3SVpH8mW7myDxhIvYxD0hr490t6WNJ3q5Y2eIukxyQ9AFxeK5jhHkS6k/5/AO9P532/pENTHD9KcS1Mx3xI0jclfUvSRkl/Junj6TM/lDQ9fe5KSetTj+iWQv9Uras5KVg3uxuYLekpSddI+n0ASQcDNwDvj4g3k/WY/2vVcb+JiLdHxE1kdxwvjohTgN3A/wLeFxFvAa4HVqZjvgJcGRFn7i+oiHgNuIpsnfxTIuJWsrtevx8R/x74j8Bn06qpACcDHwBOT/XtiGwRvQfIlkeAbDG9UyPid4E/beyPyWwfJwXrWhHxr8BbgKVky1TfKulDwElki7U9lT66iuzBKcNuHeWUJ5F9Qd8j6VHgL4FZko4EpkXE/elzNzYR7h8Ay9J57yNb6uD4tO/eiHg5IobIllH+Vip/nGxdf8iWyhhIK5HubqJ+M8BzCtblImIP2ZfsfZIeJ1to7NH9HPbKKOUC1o3sDUiaxoEvbSzgjyNiw4hz/x6ws6pob9X2Xvb9H34XWWK7APiUpPmRrcVv1hD3FKxrSTpJ0tyqolOATWSLtPVKelMqvxi4f+Txyctkjz2FbEGznjSBjaSD0pfvi8BLkt6ePre4jvCqzwvZ07auSCtmIunUOs5B+uwEYHZE3Ev2cKJpwGH1Hm9WzUnButlhwKrhCViy5+X+VUT8BrgE+FrqPewFvjTKOW4AvpSGdSaSXT3015IeI+txvDV97hLgH9NE86t1xHYvMG94ohn4DHAQ8JN0OetnGmjnROCm1JZHgL9LicqsYV4l1czMKtxTMDOzCicFMzOrcFIwM7MKJwUzM6twUjAzswonBTMzq3BSMDOziv8PQrLLve7VfpEAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as pyplot\n",
    "%matplotlib inline  \n",
    "\n",
    "pyplot.plot(features_per_item, 'ro')\n",
    "pyplot.ylabel('Num features ')\n",
    "pyplot.xlabel('Sorted items')\n",
    "pyplot.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAbP0lEQVR4nO3de5Ad5Xnn8e9PI3ERNyMYKKHbiES2V4ptjKe0ZnHsjXEBZm0LO2aRM2ZlUK02lhzjSrKOWKVib3ZVAZOkQi7Clg1kkowjFLAXxUtisGJwvCbAAOIiZIwMSMgSSJAQBHKEJT37R7/Tao3OzJw56j5zjub3qTrV3c/pyzNHp86jt9/utxURmJmZAUwY6wTMzKx1uCiYmVnORcHMzHIuCmZmlnNRMDOz3MSxTuBInH766dHV1TXWaZiZtZWHHnropYjorPVeWxeFrq4u+vv7xzoNM7O2ImnLUO/59JGZmeVcFMzMLOeiYGZmORcFMzPLuSiYmVnORcHMrJ309UFXF0yYkE37+krdfVtfkmpmNq709cGSJbBnT7a8ZUu2DNDTU8oh3FIwM2sXK1YcLAgD9uzJ4iVxUTAzaxdbt44u3gAXBTOzdjFz5ujiDXBRMDNrFytXwuTJh8YmT87iJXFRMDNrFz09sHo1zJoFUjZdvbq0Tmbw1UdmZu2lp6fUIjCYWwpmZpZzUTAzs5yLgpmZ5VwUzMwsV2lRkPQmSbdJ+qGkTZLOkzRF0t2Snk7TUwvrXyNps6SnJF1UZW5mZna4qlsKNwB/HxFvBd4BbAKWA+sjYg6wPi0jaS6wEJgHXAysktRRcX5mZlZQWVGQdDLwXuAmgIh4IyJeARYAvWm1XuDSNL8AWBMReyPiWWAzML+q/MzM7HBVthTOBnYBt0h6RNLXJJ0AnBkROwDS9Iy0/jTg+cL221LMzMyapMqiMBE4F7gxIt4JvE46VTQE1YjFYStJSyT1S+rftWtXOZmamRlQbVHYBmyLiPvT8m1kReJFSVMB0nRnYf0Zhe2nA9sH7zQiVkdEd0R0d3Z2Vpa8mdl4VFlRiIgXgOclvSWFLgCeBNYBi1JsEXBHml8HLJR0rKTZwBzggaryMzOzw1U99tGvAX2SjgGeAa4kK0RrJS0GtgKXAUTERklryQrHPmBZROyvOD8zMyuotChExAagu8ZbFwyx/kqgvDFgzcxsVHxHs5mZ5VwUzMws56JgZmY5FwUzM8u5KJiZWc5FwczMci4KZmaWc1EwM7Oci4KZmeVcFMzMLOeiYGZmORcFMzPLuSiYmVnORcHMzHIuCmZmlnNRMDOznIuCmZnlXBTMzCznomBmZjkXBTMzy7komJlZzkXBzMxylRYFSc9JelzSBkn9KTZF0t2Snk7TUwvrXyNps6SnJF1UZW5mZna4ZrQUfikizomI7rS8HFgfEXOA9WkZSXOBhcA84GJglaSOJuRnZmbJWJw+WgD0pvle4NJCfE1E7I2IZ4HNwPwxyM/MbNyquigEcJekhyQtSbEzI2IHQJqekeLTgOcL225LMTMzG9DXB11dMGFCNu3rK3X3E0vd2+HOj4jtks4A7pb0w2HWVY1YHLZSVlyWAMycObOcLM3M2kFfHyxZAnv2ZMtbtmTLAD09pRyi0pZCRGxP053AN8lOB70oaSpAmu5Mq28DZhQ2nw5sr7HP1RHRHRHdnZ2dVaZvZtZaVqw4WBAG7NmTxUtSWVGQdIKkkwbmgQuBJ4B1wKK02iLgjjS/Dlgo6VhJs4E5wANV5Wdm1na2bh1dvAFVnj46E/impIHjfD0i/l7Sg8BaSYuBrcBlABGxUdJa4ElgH7AsIvZXmJ+ZWXuZOTM7ZVQrXpLKikJEPAO8o0b8ZeCCIbZZCaysKiczs7a2cuWhfQoAkydn8ZL4jmYzs3bR0wOrV8OsWSBl09WrS+tkhuqvPjIzszL19JRaBAZzS8HMzHIuCmZmlnNRMDOznIuCmZnlXBTMzCznomBmZjkXBTMzy7komJlZzkXBzMxyLgpmZpZzUTAzs5yLgpmZ5VwUzMws56JgZmY5FwUzM8u5KJiZWc5FwczMci4KZmaWc1EwM7Oci4KZWTvp64OuLpgwIZv29ZW6+8qLgqQOSY9I+lZaniLpbklPp+mphXWvkbRZ0lOSLqo6NzOzttLXB1ddBVu2QEQ2veqqUgtDM1oKVwObCsvLgfURMQdYn5aRNBdYCMwDLgZWSepoQn5mZu3h6qvhjTcOjb3xRhYvSaVFQdJ04D8BXyuEFwC9ab4XuLQQXxMReyPiWWAzML/K/MzM2srLL48u3oCqWwp/BHweOFCInRkROwDS9IwUnwY8X1hvW4odQtISSf2S+nft2lVN1mZm41RlRUHSh4CdEfFQvZvUiMVhgYjVEdEdEd2dnZ1HlKOZWVs57bTRxRswYlGQdLWkk5W5SdLDki6sY9/nAx+R9BywBni/pL8CXpQ0Ne17KrAzrb8NmFHYfjqwfRR/i5nZ0e2GG2DSpENjkyZl8ZLU01K4KiJeBS4EOoErgWtH2igiromI6RHRRdaB/A8R8UlgHbAorbYIuCPNrwMWSjpW0mxgDvDAaP4YM7OjWk8P3HILzJoFUja95ZYsXpKJdawzcFrnEuCWiHhUUq1TPfW6FlgraTGwFbgMICI2SloLPAnsA5ZFxP4jOI6Z2dGnp6fUIjCYIg47bX/oCtItZB2+s4F3AB3APRHxrsqyqlN3d3f09/ePdRpmZm1F0kMR0V3rvXpaCouBc4BnImKPpNPITiGZmdlRZsQ+hYg4QHY6572SPga8D/j5qhMzM7Mali6FiROzPoWJE7PlEo3YUpB0M/B2YCMH7zcI4BulZmJmZsNbuhRuvPHg8v79B5dXrSrlEPX0KTwZEXNLOVrJ3KdgZuPKxIlZIRisowP27at7N8P1KdRzSep9aVwiMzMbS7UKwnDxBtTT0dxLVhheAPaSXaIaEfH20rIwM7ORdXQM3VIoST1F4WbgCuBxDh3DyMzMmmnJkkP7FIrxktRTFLZGxLrSjmhmZo0Z6ExevTprMXR0ZAWhpE5mqK+jeRXwJuBvyU4fARARY371kTuazcxG70hvXjuerBgUB8HzJalmZkehEYtCRPjuZTOzVtHXBytWwNatMHMmrFxZ6lhI9Qyd/WZJ6yU9kZbfLum3S8vAzMzq09eX9SEUn9G8ZEnTn9H8VeAa4GcAEfEY2VDYZmbWTCtWwJ49h8b27MniJamnKEyOiMHPNaj/1jkzMyvH1q2jizegnqLwkqSfIz0aU9LHgR2lZWBmZvWZOXN08QbUUxSWAV8B3irpJ8DngF8tLQMzM6vPJZeMLt6Aei5JjYj4gKQTgAkRsTs9LtPMzJrpzjtHF29APS2F2wEi4vWI2J1it5WWgZmZ1acJfQpDthQkvRWYB5ySHq4z4GTguNIyMDOz+sycmV2GWitekuFaCm8BPkQ2xMWHC69zgf9aWgZmZlaflSth8uRDY5MnZ/GSDNlSiIg7gDsknRcR95V2RDMza8zAncsV3tE85IB4kj4fEV+S9Ceky1GLIuKzpWXRIA+IZ2Y2eo0OiLcpTRv61ZV0HPA94Nh0nNsi4guSpgC3Al3Ac8B/joh/SdtcAywG9gOfjYhvN3JsMzNrzHCnj/42TXsb3Pde4P0R8ZqkScD3Jf0d8DFgfURcK2k5sBz4rfTIz4VkndtnAd+R9OaIKO85c2ZmNqx6LkltSGReS4uT0iuABWSP+CRNL03zC4A1EbE3Ip4FNgPzq8rPzMwOV1lRAJDUIWkDsBO4OyLuB86MiB0AaXpGWn0a8Hxh820pNnifSyT1S+rftWtXlembmY07lRaFiNgfEecA04H5kn5hmNVVaxc19rk6Irojoruzs7OsVM3M2sPSpTBxIkjZdOnSUnc/4jAXaUiLXyPrGM7Xj4iP1HuQiHhF0j3AxcCLkqZGxA5JU8laEZC1DGYUNpsObK/3GGZmR72lS+HGGw8u799/cLmk5zTX84zmR4GbgMeBAwPxiLh3hO06gZ+lgnA8cBdwHfA+4OVCR/OUiPi8pHnA18n6Ec4C1gNzhuto9iWpZjaudHTAgQOHxydMyApEnY70Gc3/FhF/XPfRDpoK9ErqIDtNtTYiviXpPmCtpMXAVuAygIjYKGkt8CTZ8xqW+cojM7OCWgVhuHgD6mkp/Aowh+x/+nsH4hHxcGlZNMgtBTMbV1Sr6zUZ4bf80N0cWUvhbcAVwPs5ePoo0rKZmTXLCSfA66/XjpeknqLwUeDsiHijtKOamdnofeUrcMUVh7YKpCxeknouSX2UbKRUMzM7ytXTp3AP8HbgQQ7tU6j7ktSquE/BzMaVk06C1147PH7iibB79+HxIRxpn8IX6j6SmZlVp1ZBGC7egBGLwkj3I5iZ2dGjnjuad3NwuIljyAa2ez0iTq4yMTMza756WgonFZclXYpHLzUzOyqNekC8iPg/+B4FM7OjUj2njz5WWJwAdFNj9FIzM6tYi9y89uHC/D6yR2guKC0DMzOrz549o4s3oJ4+hStLO5qZmTVu5kzYsqV2vCRDFgVJvzPMdhER/6u0LMzMbGQrV8JVV8EbhVGHjjkmi5dkuJZCjRNXnAAsBk4DXBTMzJpt8CgUoxgdtR4jDnMBIOkk4GqygrAW+IOI2Dn8VtXzMBdmNq50ddU+fTRrFjz3XN27aXiYC0lTgF8HeoBe4NyI+Je6j2xmZuWpVRCGizdguD6F64GPAauBt0VEeYNrmJnZ6E2YMPTjOMs6xDDv/QbZs5J/G9gu6dX02i3p1dIyMDOz+jThcZxDthQiorzSY2ZmbcE//GZm7WKoO5dLvKPZRcHMrF3s21f5IVwUzMzaQV8f7N1b+71a4yE1qLKiIGmGpO9K2iRpo6SrU3yKpLslPZ2mpxa2uUbSZklPSbqoqtzMzNrO1Vc35TBVthT2Ab8REf8OeDewTNJcYDmwPiLmAOvTMum9hcA84GJglaSOCvMzM2sfL7889HunnVbaYSorChGxIyIeTvO7gU3ANLIRVnvTar3ApWl+AbAmIvZGxLPAZvwwHzOzkd1wQ2m7akqfgqQu4J3A/cCZEbEDssIBnJFWmwY8X9hsW4oN3tcSSf2S+nft2lVl2mZm7aGnp7RdVV4UJJ0I3A58LiKGu+lNNWKHDcwUEasjojsiujs7O8tK08zMqLgoSJpEVhD6IuIbKfyipKnp/anAwMB624AZhc2nA9urzM/MrG2cddbo4g2q8uojATcBmyLiDwtvrQMWpflFwB2F+EJJx0qaDcwBHqgqPzOztvLCC6OLN6iex3E26nzgCuBxSRtS7H8A1wJrJS0GtgKXAUTERklrgSfJrlxaFhH7K8zPzKx9NGHcI6iwKETE96ndTwBwwRDbrATKe4SQmZmNiu9oNjOznIuCmZnlXBTMzNrBUA/SKfEBO+CiYGbWHprU0eyiYGZmORcFMzPLuSiYmVnORcHMzHIuCmZmlnNRMDOznIuCmZnlXBTMzCznomBm1g46hnhk/VDxBrkomJm1g/1DPElgqHiDXBTMzCznomBmZjkXBTMzy7komJlZzkXBzMxyLgpmZpZzUTAzs5yLgpmZ5SorCpJulrRT0hOF2BRJd0t6Ok1PLbx3jaTNkp6SdFFVeZmZ2dCqbCn8OXDxoNhyYH1EzAHWp2UkzQUWAvPSNqsklXvvtpmZjaiyohAR3wP+eVB4AdCb5nuBSwvxNRGxNyKeBTYD86vKzczMamt2n8KZEbEDIE3PSPFpwPOF9bal2GEkLZHUL6l/165dlSZrZjbetEpHs2rEotaKEbE6Irojoruzs7PitMzMxpdmF4UXJU0FSNOdKb4NmFFYbzqwvcm5mZm1plNPHXmdkjS7KKwDFqX5RcAdhfhCScdKmg3MAR5ocm5mZq3plVeadqiJVe1Y0l8D/xE4XdI24AvAtcBaSYuBrcBlABGxUdJa4ElgH7AsIsodJNzMzEZUWVGIiE8M8dYFQ6y/ElhZVT5mZkclP3nNzMxyvb0jrzMKLgpmZu2sp6fU3bkomJm1sr6+ph7ORcHMrJV98pNNPZyLgpmZ5VwUzMws56JgZmY5FwUzs1alWsPCFUTNIeKOiIuCmZnlXBTMzCznomBm1opGOnVUERcFM7N29OlPV7JbFwUzs1ZTTyth1apKDu2iYGbWSpo8rMVglQ2dbWZmo1RvP8IFNZ9AUAq3FMzMWsFoOpa/853K0nBRMDMbS5Mnj64gHH98dbng00dmZmOj0UtO9+wpN49BXBTMzJqhjPsOKhjWYjCfPjIzK8O8edkP/1CvNuGWgplZLR0dcODAWGdxUBNaCeCiYGZjbfJk+OlPxzqL1tWkYjCg5YqCpIuBG4AO4GsRcW0FByl9l2ZmpWtyQYAW61OQ1AH8GfBBYC7wCUlzSz5IqbszMytdxJgUBGixogDMBzZHxDMR8QawBlgwxjmZmTXHGBaDAa1WFKYBzxeWt6VYTtISSf2S+nft2tXU5MzMSjVQBFqgGAxotaJQ69zOIZ9URKyOiO6I6O7s7GxSWmZmDRj8o9+CRWCwVuto3gbMKCxPB7aPUS5mZodq0R/yMrVaUXgQmCNpNvATYCHwK6UeIcKdzWatbhz8+LaqlioKEbFP0meAb5NdknpzRGys4ECl79LM7GjQUkUBICLuBO4c6zzMzMajVutoNjOzMeSiYGZmORcFMzPLuSiYmVlO0cZX4kjaBWw5gl2cDrxUUjrN5Lyby3k3l/Ou3qyIqHn3b1sXhSMlqT8iusc6j9Fy3s3lvJvLeY8tnz4yM7Oci4KZmeXGe1FYPdYJNMh5N5fzbi7nPYbGdZ+CmZkdary3FMzMrMBFwczMcuOyKEi6WNJTkjZLWt4C+cyQ9F1JmyRtlHR1in9R0k8kbUivSwrbXJPyf0rSRYX4uyQ9nt77Y6naccIlPZeOt0FSf4pNkXS3pKfT9NRWylvSWwqf6QZJr0r6XCt+3pJulrRT0hOFWGmfr6RjJd2a4vdL6qow7+sl/VDSY5K+KelNKd4l6aeFz/3LLZZ3ad+LqvIuVUSMqxfZkNw/Bs4GjgEeBeaOcU5TgXPT/EnAj4C5wBeB36yx/tyU97HA7PT3dKT3HgDOI3uK3d8BH6w49+eA0wfFvgQsT/PLgetaLe9B34cXgFmt+HkD7wXOBZ6o4vMFlgJfTvMLgVsrzPtCYGKav66Qd1dxvUH7aYW8S/teVJV3ma/x2FKYD2yOiGci4g1gDbBgLBOKiB0R8XCa3w1sYtCzqQdZAKyJiL0R8SywGZgvaSpwckTcF9m37i+ASytOf6j8etN8byGHVsz7AuDHETHcnfFjlndEfA/45xr5lPX5Fvd1G3BBGa2dWnlHxF0RsS8t/hPZkxWH1Cp5D6NlPu8yjceiMA14vrC8jeF/gJsqNSffCdyfQp9Jze2bC6cJhvobpqX5wfEqBXCXpIckLUmxMyNiB2QFDzgjxVsp7wELgb8uLLf65w3lfr75NukH+1+B0yrL/KCryP4HPWC2pEck3SvpFwu5tUreZX0vxurzrtt4LAq1qnJLXJcr6UTgduBzEfEqcCPwc8A5wA7gDwZWrbF5DBOv0vkRcS7wQWCZpPcOs24r5Y2kY4CPAH+TQu3weQ+nkTyb/jdIWgHsA/pSaAcwMyLeCfw68HVJJ4+QWzPzLvN70WrfmcOMx6KwDZhRWJ4ObB+jXHKSJpEVhL6I+AZARLwYEfsj4gDwVbJTXzD037CNQ5vklf9tEbE9TXcC30w5vpia0AOnAHa2Wt7JB4GHI+JFaI/POynz8823kTQROIX6T5+MmqRFwIeAnnRqhXT65eU0/xDZufk3t0reJX8vmvp5N2I8FoUHgTmSZqf/KS4E1o1lQumc4k3Apoj4w0J8amG1jwIDV0SsAxamKxlmA3OAB9KphN2S3p32+V+AOyrM+wRJJw3Mk3UkPpHyW5RWW1TIoSXyLvgEhVNHrf55F5T5+Rb39XHgHwZ+rMsm6WLgt4CPRMSeQrxTUkeaPzvl/UwL5V3m96JpeTdsrHu6x+IFXEJ2hc+PgRUtkM97yJqQjwEb0usS4C+Bx1N8HTC1sM2KlP9TFK54AbrJvrQ/Bv6UdNd6RXmfTXb1xaPAxoHPkuwc6Xrg6TSd0kp5p+NNBl4GTinEWu7zJitaO4Cfkf0vc3GZny9wHNnps81kV8ycXWHem8nOpw98xweuwvnl9P15FHgY+HCL5V3a96KqvMt8eZgLMzPLjcfTR2ZmNgQXBTMzy7komJlZzkXBzMxyLgpmZpZzUbC2J2mFstFlH0ujWP77UW7/KUlnjXKbruJImoPeuz7lc/1o9pm2Pac4CqdZs00c6wTMjoSk88jukD03IvZKOp1s9Nt6t+8APkV2TXlZdyP/N6AzIvY2sO05ZNe431nvBukGKUV2x63ZEXFLwdrdVOClgR/giHgp0tAbki5Ig6w9ngYyOzbFn5P0O5K+T3ZXczfQl1oZx6ex8O9Ng/x9uzCkxLskPSrpPmBZrWQkrQNOAO6XdHm6W/d2SQ+m1/lpvfmSfpDy+4GyZzwcA/wucHnK5XJlY/n/ZmH/T6RWSpey52+sIrvha4ak/56O8Zik/5nWP0HS/015PyHp8gr+DexoMtZ3z/nl15G8gBPJ7o79EbAKeF+KH0d29+yb0/JfkA00CNkzID5f2Mc9QHeanwT8gOx/+gCXAzen+ccK+7+eoZ8B8Fph/uvAe9L8TLKhTABO5uCzBT4A3J7mPwX8aWH7L1IYy5+sRdOVXgeAd6f4hWQPjhfZf/a+RfZsgF8GvlrY/pRaOfvl18DLp4+srUXEa5LeBfwi8EvArcqepvcI8GxE/Cit2kv2v/s/Ssu3DrHLtwC/ANydnZWhA9gh6RTgTRFxb1rvL8kG1BvJB4C5Ojhk/slpvKhTgF5Jc8iGOJlUz987yJaI+Kc0f2F6PZKWTyQbi+cfgd+XdB3wrYj4xwaOY+OIi4K1vYjYT/a//XskPU424NiGETZ7fYi4gI0Rcd4hwezRkY2MCTMBOC8ifjpof38CfDciPqrsGRr3DLH9Pg49zXtcYb74Nwj4vYj4yuAdpKJ5CfB7ku6KiN8d7R9h44f7FKytpXPxcwqhc4AtwA+BLkk/n+JXAPcO3j7ZTfYYVMgGNutMHdhImiRpXkS8AvyrpPek9XrqTPEu4DOFfM9Js6cAP0nznxoiF8hOdZ2btj2X7LGPtXwbuErZMzmQNE3SGemqqj0R8VfA7w/sy2woLgrW7k4kOw3zpKTHSM+2joh/A64E/ia1Hg4AXx5iH38OfFnSBrLTRR8HrpP0KFmL4z+k9a4E/ix1NP+01o5q+CzQnTp/nwR+NcW/RPY/9/+Xjjngu2SnmzakTuHbgSkpt0+T9Z0cJiLuIuu/uC/9vbeRFZe3AQ+k7VcA/7vOvG2c8iipZmaWc0vBzMxyLgpmZpZzUTAzs5yLgpmZ5VwUzMws56JgZmY5FwUzM8v9f+soTG06Cut8AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "pyplot.plot(items_per_feature, 'ro')\n",
    "pyplot.ylabel('Num items ')\n",
    "pyplot.xlabel('Sorted features')\n",
    "pyplot.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# We can now build the recommender algorithm, but first we need the train/test split and the evaluation function:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: 1689 (2.36 %) of 71567 users have no train items\n",
      "Warning: 1763 (2.46 %) of 71567 users have no sampled items\n",
      "Warning: 1689 (2.36 %) of 71567 users have no train items\n",
      "Warning: 1929 (2.70 %) of 71567 users have no sampled items\n"
     ]
    }
   ],
   "source": [
    "from Base.Evaluation.Evaluator import EvaluatorHoldout\n",
    "from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample\n",
    "\n",
    "URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)\n",
    "URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.80)\n",
    "\n",
    "evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])\n",
    "evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "from Base.Similarity.Compute_Similarity_Python import Compute_Similarity_Python\n",
    "\n",
    "class ItemKNNCBFRecommender(object):\n",
    "    \n",
    "    def __init__(self, URM, ICM):\n",
    "        self.URM = URM\n",
    "        self.ICM = ICM\n",
    "        \n",
    "            \n",
    "    def fit(self, topK=50, shrink=100, normalize = True, similarity = \"cosine\"):\n",
    "        \n",
    "        similarity_object = Compute_Similarity_Python(self.ICM.T, shrink=shrink, \n",
    "                                                  topK=topK, normalize=normalize, \n",
    "                                                  similarity = similarity)\n",
    "        \n",
    "        self.W_sparse = similarity_object.compute_similarity()\n",
    "\n",
    "        \n",
    "    def recommend(self, user_id, at=None, exclude_seen=True):\n",
    "        # compute the scores using the dot product\n",
    "        user_profile = self.URM[user_id]\n",
    "        scores = user_profile.dot(self.W_sparse).toarray().ravel()\n",
    "\n",
    "        if exclude_seen:\n",
    "            scores = self.filter_seen(user_id, scores)\n",
    "\n",
    "        # rank items\n",
    "        ranking = scores.argsort()[::-1]\n",
    "            \n",
    "        return ranking[:at]\n",
    "    \n",
    "    \n",
    "    def filter_seen(self, user_id, scores):\n",
    "\n",
    "        start_pos = self.URM.indptr[user_id]\n",
    "        end_pos = self.URM.indptr[user_id+1]\n",
    "\n",
    "        user_profile = self.URM.indices[start_pos:end_pos]\n",
    "        \n",
    "        scores[user_profile] = -np.inf\n",
    "\n",
    "        return scores"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### We need to define Cosine similarity... Let's look at the attached source code"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### See also a [list of commonly used KNN similarity heuristics](https://github.com/MaurizioFD/RecSys_Course_2018/blob/master/slides/List_of_KNN_similarity_heuristics.pdf)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## A KNN is built with the following steps:\n",
    "* Compute the similarity of an item with all others\n",
    "* Select the k-highest similarities"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "item_id = 80\n",
    "shrink = 10"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The numerator is the dot product of the item features times the whole ICM data transposed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([2., 0., 0., ..., 0., 1., 0.])"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "numerator_vector = ICM_all[item_id].dot(ICM_all.T).toarray().ravel()\n",
    "numerator_vector"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1.73205081, 4.35889894, 5.29150262, ..., 1.        , 1.41421356,\n",
       "       1.41421356])"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "item_norms = np.sqrt(np.array(ICM_all.T.power(2).sum(axis=0))).ravel()\n",
    "item_norms"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The denominator will be the product of norms plus the srink term and a small value which prevents the denominator to be zero (only for non-negative data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([15.47722658, 23.78404975, 26.73320153, ..., 13.16227866,\n",
       "       14.47213695, 14.47213695])"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "denominator_vector = item_norms[item_id] * item_norms + shrink + 1e-6\n",
    "denominator_vector"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "similarity_vector = numerator_vector/denominator_vector"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's sort the similarity from the highest to the lowest"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([   80,  3542,     0, ...,  3938,  3931, 10680], dtype=int64)"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sorted_item_indices = np.argsort(-similarity_vector)\n",
    "sorted_item_indices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEGCAYAAABo25JHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAUHklEQVR4nO3df5BvdX3f8eeLxauiIlG2lXKBe42k9jqjDd1QMNaKFgPWEdMyE8xStUl7SwymxmYMDjPpOC1TU5OMxlBubgiR6o3EqLE3FIutydBUq97FIAJy9YoBNpiy/hhRr+Pl4rt/nLP6Zfnu7nfXPfu9+z3Px8zO95zPOef7fX9m7/2+9pzzOeekqpAk9ddx4y5AkjReBoEk9ZxBIEk9ZxBIUs8ZBJLUc8ePu4C1Ovnkk2vHjh3jLkOStpRbb731K1U1PWzZlguCHTt2MDc3N+4yJGlLSXLvcss8NCRJPWcQSFLPGQSS1HMGgST1nEEgST3XaRAkuSDJwSSHklwxZPmLknwjyW3tz691Usi+fbBjBxx3XPO6b18nHyNJW1Fnw0eTTAFXA+cD88CBJPur6q4lq/5FVb28qzrYtw9274bDh5v5e+9t5gFmZzv7WEnaKrrcIzgbOFRV91TVEeAG4KIOP2+4K6/8QQgsOny4aZckdRoEpwL3D8zPt21LnZvkM0k+nOQ5w94oye4kc0nmFhYW1lbFffetrV2SeqbLIMiQtqVPwfk0cEZVPQ94J/ChYW9UVXuraqaqZqanh14hvbzTT19buyT1TJdBMA+cNjC/HXhgcIWqeqiqvtVO3wQ8LsnJG1rFVVfBCSc8uu2EE5p2SVKnQXAAODPJziTbgEuA/YMrJHlGkrTTZ7f1fHVDq5idhb174YwzIGle9+71RLEktTobNVRVR5NcDtwMTAHXVdWdSS5rl+8BLgZ+IclR4DvAJdXFQ5RnZ/3il6RlZKs9vH5mZqa8+6gkrU2SW6tqZtgyryyWpJ4zCCSp5wwCSeo5g0CSes4gkKSeMwgkqecMAknqOYNAknrOIJCknjMIJKnnDAJJ6jmDQJJ6ziCQpJ4zCCSp5wwCSeo5g0CSes4gkKSeMwgkqecMAknqOYNAknrOIJCknjMIJKnnDAJJ6jmDQJJ6ziCQpJ4zCCSp5wwCSeo5g0CSes4gkKSeMwgkqecMAknquU6DIMkFSQ4mOZTkihXW+4kkjyS5uMt6JEmP1VkQJJkCrgYuBHYBr0qya5n1fh24uataJEnL63KP4GzgUFXdU1VHgBuAi4as93rgA8CDHdYiSVpGl0FwKnD/wPx82/Z9SU4FfhrYs9IbJdmdZC7J3MLCwoYXKkl91mUQZEhbLZl/O/CrVfXISm9UVXuraqaqZqanpzesQEkSHN/he88Dpw3MbwceWLLODHBDEoCTgZclOVpVH+qwLknSgC6D4ABwZpKdwF8DlwA/O7hCVe1cnE7yLuBGQ0CSNldnQVBVR5NcTjMaaAq4rqruTHJZu3zF8wKSpM3R5R4BVXUTcNOStqEBUFWv7bIWSdJwXlksST1nEEhSzxkEktRzBoEk9ZxBIEk9ZxBIUs8ZBJLUcwaBJPWcQSBJPWcQSFLPGQSS1HMGgST1nEEgST1nEEhSzxkEktRzBoEk9ZxBIEk9ZxBIUs8ZBJLUcwaBJPWcQSBJPWcQSFLPGQSS1HMGgST1nEEgST1nEEhSzxkEktRzBoEk9ZxBIEk9ZxBIUs91GgRJLkhyMMmhJFcMWX5RktuT3JZkLskLuqxHkvRYx3f1xkmmgKuB84F54ECS/VV118BqHwX2V1UleS7wPuDZXdUkSXqskfYI2i/1tTobOFRV91TVEeAG4KLBFarqW1VV7eyTgEKStKlGPTR0KMnbkuxaw3ufCtw/MD/ftj1Kkp9Ocjfw34GfG/ZGSXa3h47mFhYW1lCCJGk1owbBc4HPA9cm+UT7xXziKttkSNtj/uKvqj+pqmcDrwT+w7A3qqq9VTVTVTPT09MjlixJGsVIQVBV36yq36uq5wNvAv498OUk1yd51jKbzQOnDcxvBx5Y4TP+N/CjSU4erXRJ0kYY+RxBklck+RPgHcBvAs8E/hS4aZnNDgBnJtmZZBtwCbB/yfs+K0na6bOAbcBX19UTSdK6jDpq6AvAnwNvq6qPD7S/P8kLh21QVUeTXA7cDEwB11XVnUkua5fvAf458OokDwPfAX5m4OSxJGkTZJTv3SQvqKr/s6TtJ6vqY51VtoyZmZmam5vb7I+VpC0tya1VNTNs2agni397SNs711+SJOlYseKhoSTnAs8HppO8cWDRiTSHeyRJW9xq5wi2AU9u13vKQPtDwMVdFSVJ2jwrBkFV3QLckuRdVXXvJtUkSdpEqx0aentVvQH4nSTDLgZ7RWeVSZI2xWqHht7dvv5G14VIksZjtUNDt7Y3nPvXVXXpJtUkSdpEqw4frapHaEYNbduEeiRJm2zUK4v/CvhYkv3Atxcbq+q3uihKkrR5Rg2CB9qf43j0MFJJ0hY3UhBU1Vu6LkSSNB4jBUGSaZrbTz8HeMJie1W9uKO6JEmbZNR7De0D7gZ2Am+hOWdwoKOaJEmbaNQgeHpV/T7wcFXdUlU/B5zTYV2SpE0y6snih9vXLyf5pzQnjrd3U5IkaTONGgT/MclTgX9Hc/vpE4Ff7qwqSdKmGXXU0I3t5DeA87orR5K02Va76dw7gWUfYVZVv7ThFUmSNtVqewQ+E1KSJtxqN527frMKkSSNx0jPI0jypww5ROTzCCRp6/N5BJLUc6s+j6B9vWVzypEkbbaRrixO8vIkf5nka0keSvLNJA91XZwkqXujXlD2duCfAZ+tqmWHk0qStp5R7zV0P3CHISBJk2fUPYI3ATcluQX47mKjTyiTpK1v1CC4CvgWzbMIfHaxJE2QUYPgaVX10k4rkSSNxajnCP5XEoNAkibQqEHwi8D/SPIdh49K0mQZKQiq6ilVdVxVPbGqTmznT1xtuyQXJDmY5FCSK4Ysn01ye/vz8STPW08nJEnrt9q9hp5dVXcnOWvY8qr69ArbTgFXA+cD88CBJPur6q6B1b4E/OOq+nqSC4G9wD9cayckSeu32sniNwK7gd8caBu8luDFK2x7NnCoqu4BSHIDcBHw/SCoqo8PrP8JfPylJG261Q4NXZvkGVV1XlWdB7yLZhjpHcDFq2x7Ks2FaIvm27bl/Dzw4WELkuxOMpdkbmFhYZWPlSStxWpBsAc4ApDkhcB/Aq6neWTl3lW2zZC2oVcmJzmPJgh+ddjyqtpbVTNVNTM9Pb3Kx0qS1mK1Q0NTVfW1dvpngL1V9QHgA0luW2XbeeC0gfntwANLV0ryXOBa4MKq+upoZUuSNspqewRTSRbD4iXAnw0sWy1EDgBnJtmZZBtwCbB/cIUkpwMfBP5FVX1+9LIlSRtltS/z9wK3JPkK8B3gLwCSPIvm8NCyqupoksuBm4Ep4LqqujPJZe3yPcCvAU8H/ksSgKNVNfND9EeStEZZ7YaiSc4BTgE+UlXfbtt+DHjySsNHuzIzM1Nzc3Ob/bGStKUluXW5P7RXvddQVX1iSJuHcSRpQox6iwlJ0oQyCCSp5wwCSeo5g0CSes4gkKSeMwgkqecMAknqOYNAknrOIJCknjMIJKnnDAJJ6jmDQJJ6rh9BsG8f7NgBxx3XvO7bN+6KJOmYserdR7e8fftg9244fLiZv/feZh5gdnZ8dUnSMWLy9wiuvPIHIbDo8OGmXZLUgyC47761tUtSz0x+EJx++traJalnJj8IrroKTjjh0W0nnNC0S5J6EASzs/Ca18DUVDM/NdXMe6JYkoA+BMG+fXD99fDII838I4808w4hlSSgD0HgqCFJWtHkB8Fyo4PuvXdz65CkY9TkB8Fyo4MSDw9JEn0Igquuar70l6ry8JAk0YcgmJ1tvvSH8fCQJPUgCCRJKzIIJKnnDAJJ6jmDQJJ6rtMgSHJBkoNJDiW5YsjyZyf5v0m+m+RXuqxFkjRcZw+mSTIFXA2cD8wDB5Lsr6q7Blb7GvBLwCu7qkOStLIu9wjOBg5V1T1VdQS4AbhocIWqerCqDgAPd1iHJGkFXQbBqcD9A/PzbduaJdmdZC7J3MLCwoYUJ0lqdBkEQy7nZZkru1ZWVXuraqaqZqanp3/IsiRJg7oMgnngtIH57cADHX6eJGkdugyCA8CZSXYm2QZcAuzv8PMkSevQ2aihqjqa5HLgZmAKuK6q7kxyWbt8T5JnAHPAicD3krwB2FVVD3VVlyTp0ToLAoCqugm4aUnbnoHpv6E5ZCRJGhOvLJaknjMIJKnnDIJhD62RpB4xCCSp5wwCcK9AUq8ZBIsMA0k91Y8gOOmk0dYzDCT1UD+C4OtfH3cFknTM6kcQrIV7BZJ6pj9BsGvX6Osmzc/rXtddPZJ0jOhPENx559rCAOCaawwDSROvP0EATRjUGh+JcM01Hi6SNNH6FQSLnvjEtW9jGEiaUP0MgsOH4XGPW/t2i+cOJGmC9DMIAI4cgfe8Z33bGgaSJkh/gwBgdnbt5wwWJbBv38bWI0lj0O8gWFS19hFFAJde6t6BpC3PIFi0nuGliwwDSVuYQTBoPcNLFxkGkrYog2CYqvWdSPacgaQtyCBYznpOJF96aTe1SFKHDILVrDUMnvOcbuqQpI4YBKOoGj0Q7rqr21okaYMZBGux3hPJknQMMwjWyjCQNGEMgvVYz32KJOkYZRCsx5Ej465AkjaMQbBey11A5oVlkrYYg2C93v3utbVL0jHKIFiv2dnm6uMzzmj2As44o5mfnR13ZZK0JsePu4AtbXbWL35JW16nQZDkAuAdwBRwbVW9dcnytMtfBhwGXltVn+6ypg3l+QBJ47KBQ9k7OzSUZAq4GrgQ2AW8KsnS+zxfCJzZ/uwGrumqng1nCEgapw38DuryHMHZwKGquqeqjgA3ABctWeci4L9W4xPASUlO6bAmSdISXQbBqcD9A/Pzbdta1yHJ7iRzSeYWFhY2vFBJ6rMug2DYfsvSg1qjrENV7a2qmaqamZ6e3pDiJEmNLoNgHjhtYH478MA61pEkdajLIDgAnJlkZ5JtwCXA/iXr7AdencY5wDeq6ssd1rRxvPmcpHHawO+gzoaPVtXRJJcDN9MMH72uqu5Mclm7fA9wE83Q0UM0w0f/ZVf1dMIwkDQBOr2OoKpuovmyH2zbMzBdwC92WYMkaWXeYkKSes4gkKSeMwgkqecMAknqudQWG/mSZAG4d52bnwx8ZQPLORbZx8lgHyfDsdTHM6pq6BW5Wy4IfhhJ5qpqZtx1dMk+Tgb7OBm2Sh89NCRJPWcQSFLP9S0I9o67gE1gHyeDfZwMW6KPvTpHIEl6rL7tEUiSljAIJKnnehMESS5IcjDJoSRXjLueUSU5LcmfJ/lckjuT/Nu2/WlJ/meSL7SvPzKwzZvbfh5M8lMD7f8gyWfbZb+dHFsPXk4yleQvk9zYzk9UH5OclOT9Se5uf5/nTmAff7n9d3pHkvcmecIk9DHJdUkeTHLHQNuG9SvJ45P8Udv+ySQ7NrN/VNXE/9DcBvuLwDOBbcBngF3jrmvE2k8BzmqnnwJ8HtgF/Gfgirb9CuDX2+ldbf8eD+xs+z3VLvsUcC7Nk+E+DFw47v4t6esbgT8EbmznJ6qPwPXAv2qntwEnTVIfaR4z+yXgie38+4DXTkIfgRcCZwF3DLRtWL+A1wF72ulLgD/a1P6N+x/PJv0SzwVuHph/M/Dmcde1zr78N+B84CBwStt2CnBwWN9ongdxbrvO3QPtrwJ+d9z9GahnO/BR4MUDQTAxfQRObL8ks6R9kvq4+Azyp9Hc4v5G4KWT0kdgx5Ig2LB+La7TTh9PczVyuurL0p++HBpa/Ae6aL5t21La3cUfBz4J/O1qn+bWvv6tdrXl+npqO720/VjxduBNwPcG2iapj88EFoA/aA9/XZvkSUxQH6vqr4HfAO4DvkzzxMGPMEF9XGIj+/X9barqKPAN4OmdVb5EX4Jg2PHFLTVuNsmTgQ8Ab6iqh1ZadUhbrdA+dkleDjxYVbeOusmQtmO6jzR/5Z0FXFNVPw58m+ZwwnK2XB/bY+QX0RwO+TvAk5JcutImQ9qO6T6OaD39Gmuf+xIE88BpA/PbgQfGVMuaJXkcTQjsq6oPts3/L8kp7fJTgAfb9uX6Ot9OL20/Fvwk8IokfwXcALw4yXuYrD7OA/NV9cl2/v00wTBJffwnwJeqaqGqHgY+CDyfyerjoI3s1/e3SXI88FTga51VvkRfguAAcGaSnUm20ZyM2T/mmkbSjir4feBzVfVbA4v2A69pp19Dc+5gsf2SdhTCTuBM4FPtrus3k5zTvuerB7YZq6p6c1Vtr6odNL+bP6uqS5msPv4NcH+Sv9s2vQS4iwnqI80hoXOSnNDW9hLgc0xWHwdtZL8G3+timv8Dm7cXNO4TMJt4oudlNCNuvghcOe561lD3C2h2EW8Hbmt/XkZz/PCjwBfa16cNbHNl28+DDIy2AGaAO9plv8MmnoxaQ39fxA9OFk9UH4G/D8y1v8sPAT8ygX18C3B3W9+7aUbObPk+Au+lOe/xMM1f7z+/kf0CngD8MXCIZmTRMzezf95iQpJ6ri+HhiRJyzAIJKnnDAJJ6jmDQJJ6ziCQpJ4zCKRVJPlW+7ojyc+Oux5poxkE0uh2AAaBJo5BII3urcA/SnJbe9/9qSRvS3Igye1J/g1AkhcluSXJ+5J8Pslbk8wm+VR7L/ofHXM/pEc5ftwFSFvIFcCvVNXLAZLsprnD5k8keTzwsSQfadd9HvD3aO4Xcw9wbVWdnebBQq8H3rD55UvDGQTS+r0UeG6Si9v5p9LcV+YIcKDaWxQn+SKwGBCfBc7b7EKllRgE0voFeH1V3fyoxuRFwHcHmr43MP89/H+nY4znCKTRfZPmcaGLbgZ+ob1NOEl+rH3YjLSl+JeJNLrbgaNJPgO8C3gHzUiiT7e3FV4AXjm26qR18u6jktRzHhqSpJ4zCCSp5wwCSeo5g0CSes4gkKSeMwgkqecMAknquf8PoWomP//H4GcAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "pyplot.plot(similarity_vector[sorted_item_indices], 'ro')\n",
    "pyplot.ylabel('Similarity')\n",
    "pyplot.xlabel('Item')\n",
    "pyplot.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now we select the k most similar items"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "k = 100"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEGCAYAAABo25JHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAATyUlEQVR4nO3df5BdZ33f8fdHMiYVwWlsKYVKtqQkbojpmIbZeICkFENIbOJBSeuZCGTiJpmqJjEJIRnijDvpZFJPw0AyptSJR3VcPKDBZfhVhTgxLck4KZREa+IYG7ARBsmL3XptpxgiBlv42z/u2fh6fVd7V9qzV3uf92tm557znKN7v89Iup8953nOOakqJEnt2jDpAiRJk2UQSFLjDAJJapxBIEmNMwgkqXGnTbqAldq8eXPt2LFj0mVI0rpy++23P1xVW0ZtW3dBsGPHDmZnZyddhiStK0kOL7XNU0OS1DiDQJIaZxBIUuMMAklqnEEgSY3rNQiSXJTkniSHklw1Yvsrknw1yR3dz2/0Usj+/bBjB2zYMHjdv7+Xj5Gk9ai36aNJNgLXAa8G5oCDSQ5U1WcX7foXVXVJX3Wwfz/s3QtHjw7WDx8erAPs2dPbx0rSetHnEcEFwKGquq+qHgduBnb1+HmjXX31UyGw4OjRQbskqdcg2ArcP7Q+17Ut9tIkf5Pkj5O8cNQbJdmbZDbJ7Pz8/MqqOHJkZe2S1Jg+gyAj2hY/BefTwPaqehHwLuAjo96oqvZV1UxVzWzZMvIK6aWdc87K2iWpMX0GwRxw9tD6NuCB4R2q6rGq+nq3fAvwrCSbV7WKa66BTZue3rZp06BdktRrEBwEzk2yM8npwG7gwPAOSZ6XJN3yBV09j6xqFXv2wL59sH07JIPXffscKJakTm+zhqrqWJIrgVuBjcCNVXV3kiu67dcDlwJvTHIM+Aawu/p4iPKePX7xS9ISst4eXj8zM1PefVSSVibJ7VU1M2qbVxZLUuMMAklqnEEgSY0zCCSpcQaBJDXOIJCkxhkEktQ4g0CSGmcQSFLjDAJJapxBIEmNMwgkqXEGgSQ1ziCQpMYZBJLUOINAkhpnEEhS4wwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1DiDQJIaZxBIUuMMAklqnEEgSY0zCCSpcQaBJDXOIJCkxhkEktQ4g0CSGmcQSFLjDAJJapxBIEmNMwgkqXEGgSQ1rtcgSHJRknuSHEpy1XH2+8Ek30pyaZ/1SJKeqbcgSLIRuA64GDgPeF2S85bY723ArX3VIklaWp9HBBcAh6rqvqp6HLgZ2DVivzcBHwQe6rEWSdIS+gyCrcD9Q+tzXdvfS7IV+Eng+uO9UZK9SWaTzM7Pz696oZLUsj6DICPaatH6tcCvVdW3jvdGVbWvqmaqambLli2rVqAkCU7r8b3ngLOH1rcBDyzaZwa4OQnAZuA1SY5V1Ud6rEuSNKTPIDgInJtkJ/AVYDfw+uEdqmrnwnKSdwMfNQQkaW31FgRVdSzJlQxmA20Ebqyqu5Nc0W0/7riAJGlt9HlEQFXdAtyyqG1kAFTVv+6zFknSaF5ZLEmNMwgkqXEGgSQ1ziCQpMYZBJLUOINAkhpnEEhS4wwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1DiDQJIaZxBIUuMMAklqnEEgSY0zCCSpcQaBJDXOIJCkxhkEktQ4g0CSGmcQSFLjDAJJapxBIEmNMwgkqXFjBUGSjX0XIkmajHGPCA4leXuS83qtRpK05sYNgvOBe4Ebknwqyd4kZ/RYlyRpjYwVBFX1tar6L1X1MuCtwL8HHkxyU5Lv7bVCSVKvxh4jSPLaJB8G3gn8DvDdwB8Ct/RYnySpZ6eNud8XgD8D3l5Vnxxq/0CSl69+WZKktTJuEPx0Vf2v4YYkP1RVn6iqX+yhLknSGhl3sPg/jWh712oWIkmajOMeESR5KfAyYEuStwxtOgPw2gJJmgLLnRo6Hfj2br/nDrU/BlzaV1GSpLVz3CCoqtuA25K8u6oOr1FNkqQ1dNwxgiTXdov/OcmBxT/LvXmSi5Lck+RQkqtGbN+V5M4kdySZTfLDJ9gPSdIJWu7U0Hu613es9I27+xNdB7wamAMOJjlQVZ8d2u3jwIGqqiTnA+8HXrDSz5IknbjlTg3d3n2h/5uqumyF730BcKiq7gNIcjOwC/j7IKiqrw/t/xygVvgZkqSTtOz00ar6FoNZQ6ev8L23AvcPrc91bU+T5CeTfB74I+BnR71Rd2+j2SSz8/PzKyxDknQ8415Q9mXgE924wN8tNFbV7x7nz2RE2zN+46+qDwMf7q5Q/i3gR0bssw/YBzAzM+NRgyStonGD4IHuZwNPn0Z6PHPA2UPr27r3GKmq/jzJ9yTZXFUPj/kZkqSTNFYQVNVvnsB7HwTOTbIT+AqwG3j98A7dnUu/2A0Wv5jBdQuPnMBnSZJO0FhBkGQLg9tPvxD4toX2qnrlUn+mqo4luRK4lcFVyDdW1d1Jrui2Xw/8K+CnkzwBfAP4qary1I8kraFxTw3tB/4bcAlwBXA5sOyobVXdwqLbVHcBsLD8NuBt4xYrSVp949507qyq+gPgiaq6rap+FnhJj3VJktbIuEcET3SvDyb5cQaDvtv6KUmStJbGDYL/kOQ7gF9hcPvpM4Bf7q0qSdKaGXfW0Ee7xa8CF/ZXjiRprS33PIJ3cZzbPvh0Mkla/5Y7IphdkyokSROz3E3nblqrQiRJk7HcqaFrq+rNSf6Q0fcJem1vlUmS1kRvzyOQJK0Pyz6PoHu9bW3KkSSttbGuLE5ySZK/TvJokseSfC3JY30XJ0nq37gXlF0L/EvgM94UTpKmy7j3GrofuMsQkKTpM+4RwVuBW5LcBnxzoXGZJ5RJktaBcYPgGuDrDJ5FsNJnF0uSTmHjBsGZVfWjvVYiSZqIcccI/mcSg0CSptC4QfALwJ8k+YbTRyVpuox7G+rn9l2IJGkylrvX0Auq6vNJXjxqe1V9up+yJElrZbkjgrcAe4HfGWobvpbglatekSRpTS03RnBDkudV1YVVdSHwbgbTSO8CLu27OElS/5YLguuBxwGSvBz4j8BNDB5Zua/f0iRJa2G5U0Mbq+rRbvmngH1V9UHgg0nu6Lc0SdJaWO6IYGOShbB4FfCnQ9vGvRhNknQKW+7L/H3AbUkeBr4B/AVAku9lcHpIkrTOLfdgmmuSfBx4PvCxobuPbgDe1HdxkqT+LXt6p6o+NaLt3n7KkSSttXFvMSFJmlIGgSQ1ziCQpMYZBJLUOINAkhpnEEhS4wwCSWqcQSBJjTMIJKlxvQZBkouS3JPkUJKrRmzfk+TO7ueTSV7UZz2SpGfqLQiSbASuAy4GzgNel+S8Rbt9CfgXVXU+8Fv4jANJWnN9HhFcAByqqvuq6nHgZmDX8A5V9cmq+ttu9VPAth7rkSSN0GcQbAXuH1qf69qW8nPAH4/akGRvktkks/Pz86tYoiSpzyDIiLYa0UaSCxkEwa+N2l5V+6pqpqpmtmzZsoolSpL6fMrYHHD20Po24IHFOyU5H7gBuLiqHumxHknSCH0eERwEzk2yM8npwG7gwPAOSc4BPgS8wWccSNJk9HZEUFXHklwJ3ApsBG6sqruTXNFtvx74DeAs4PeSAByrqpm+apIkPVOeevrk+jAzM1Ozs7OTLkOS1pUkty/1i7ZXFktS4wwCSWqcQSBJjTMIJKlxBoEkNc4gkKTGGQSS1DiDQJIaZxBIUuMMAklqnEEgSY0zCCSpcQaBJDXOIJCkxhkEktQ4g0CSGmcQSFLj2guC/fthxw7YsGHwun//pCuSpInq7ZnFp6T9+2HvXjh6dLB++PBgHWDPnsnVJUkT1NYRwdVXPxUCC44eHbRLUqPaCoIjR1bWLkkNaCsIzjlnZe2S1IC2guCaa2DTpqe3bdo0aJekRrUVBHv2wL59sH07JIPXyy8fjBE4i0hSo9oKAhiEwZe/DE8+OTgSuOmmweyhqqdmERkGkhrSXhAMcxaRJDUeBM4ikqTGg2Cp2UJVjhdIakbbQTBqFtECxwskNaLtIBieRTTK0aNw2WUeHUiaam0HATw1iyhZeh+PDiRNMYNgwXJXFy8cHWzePPjZsGHpZY8gJK0jBsGC440XDHvkkcFP1dLLHkFIWkcMggXLjReslOMLktYJg2DYwnjBe9873tHBOA4fhje8YTAG4ekjSacgg2CU1T46qBq8evpI0inIIFhKH0cHi3k7C0mngF6DIMlFSe5JcijJVSO2vyDJ/07yzSS/2mctJ2zxHUvPOmvws9TySh0+7OwjSROVWjhtsdpvnGwE7gVeDcwBB4HXVdVnh/b5LmA78BPA31bVO5Z735mZmZqdne2l5lWx+LnIJ+tZz4IzzoBHHx1Mcb3mGp+vLGnFktxeVTOjtvV5RHABcKiq7quqx4GbgV3DO1TVQ1V1EHiixzrW1uLxheNdqDaOJ554+rjCqIFnjyIknYQ+g2ArcP/Q+lzXtmJJ9iaZTTI7Pz+/KsX1amF8oQre855nnlY6GaMGnhcPQjtLSdIK9BkEo34VPqHzUFW1r6pmqmpmy5YtJ1nWGht+EM7DDw9+Vms20lKWmqW0EBCGgqQhfQbBHHD20Po24IEeP2/9GPcq5tW2EBAeNUga0mcQHATOTbIzyenAbuBAj5+3fqxkJtLpp/dTg0cNkjq9BUFVHQOuBG4FPge8v6ruTnJFkisAkjwvyRzwFuDfJZlLckZfNZ1SRp0yGrV8442rN/A8juGjBi94k5rQ2/TRvpzy00f7tH//4AK0I0fgzDMHbY8++tTyI48MwmK1/04XBriHP8vprNK6Mqnpo1ptyx1FHG+W0skcTRxvhtLP/IzTWKV1ziCYNqPCYjggYHVPMQ1f5+A0VmldMghaMc61Das9BrHcgPQ4D/lZreUdO+Dnf37wuhafdyrWZwhrCY4R6CkLYxCHD0+6EvVlYQxpqXGfpZYdD1r3HCPQeNbijquarOWuTB/ninWPLKaOQaBnOhWuc9CpZ3hq8VKTBE6F5VP9NODJ1tpDEHtqSCdn1JTWvqaxShrYtGnwy9oKTtV5akj9WW6WUt8D0lKLVvmhVgaB+rGSgFiL5e3b4Y1vnMxnnwr1gSE8bY4cWbW3Om3V3kkax549zjyZlOWuTF9q2VN9p6Zzzlm1t/KIQGrFuPe3GueKdScJTNamTYPpvKvEIJC0vMUhsnAzxFPl1Np6Og14srVu377igeLleGpI0sp5im+qeEQgSY0zCCSpcQaBJDXOIJCkxhkEktS4dXevoSTzwIneJ3kz8PAqlrNetNjvFvsMbfa7xT7Dyvu9vaq2jNqw7oLgZCSZXeqmS9OsxX632Gdos98t9hlWt9+eGpKkxhkEktS41oJg36QLmJAW+91in6HNfrfYZ1jFfjc1RiBJeqbWjggkSYsYBJLUuGaCIMlFSe5JcijJVZOupw9Jzk7yZ0k+l+TuJL/UtZ+Z5H8k+UL3+p2TrnW1JdmY5K+TfLRbb6HP/zDJB5J8vvs7f2kj/f7l7t/3XUnel+Tbpq3fSW5M8lCSu4baluxjkl/vvtvuSfJjK/28JoIgyUbgOuBi4DzgdUnOm2xVvTgG/EpVfT/wEuAXun5eBXy8qs4FPt6tT5tfAj43tN5Cn98J/ElVvQB4EYP+T3W/k2wFfhGYqap/CmwEdjN9/X43cNGitpF97P6P7wZe2P2Z3+u+88bWRBAAFwCHquq+qnocuBnYNeGaVl1VPVhVn+6Wv8bgi2Erg77e1O12E/ATk6mwH0m2AT8O3DDUPO19PgN4OfAHAFX1eFX9P6a8353TgH+Q5DRgE/AAU9bvqvpz4NFFzUv1cRdwc1V9s6q+BBxi8J03tlaCYCtw/9D6XNc2tZLsAH4A+EvgH1XVgzAIC+C7JldZL64F3go8OdQ27X3+bmAe+K/dKbEbkjyHKe93VX0FeAdwBHgQ+GpVfYwp73dnqT6e9PdbK0GQEW1TO282ybcDHwTeXFWPTbqePiW5BHioqm6fdC1r7DTgxcDvV9UPAH/H+j8dsqzuvPguYCfwj4HnJLlsslVN3El/v7USBHPA2UPr2xgcTk6dJM9iEAL7q+pDXfP/TfL8bvvzgYcmVV8Pfgh4bZIvMzjl98ok72W6+wyDf9NzVfWX3foHGATDtPf7R4AvVdV8VT0BfAh4GdPfb1i6jyf9/dZKEBwEzk2yM8npDAZWDky4plWXJAzOGX+uqn53aNMB4PJu+XLgv691bX2pql+vqm1VtYPB3+ufVtVlTHGfAarq/wD3J/m+rulVwGeZ8n4zOCX0kiSbun/vr2IwFjbt/Yal+3gA2J3k2Ul2AucCf7Wid66qJn6A1wD3Al8Erp50PT318YcZHBLeCdzR/bwGOIvBLIMvdK9nTrrWnvr/CuCj3fLU9xn4Z8Bs9/f9EeA7G+n3bwKfB+4C3gM8e9r6DbyPwRjIEwx+4/+54/URuLr7brsHuHiln+ctJiSpca2cGpIkLcEgkKTGGQSS1DiDQJIaZxBIUuMMAmkZSb7eve5I8vpJ1yOtNoNAGt8OwCDQ1DEIpPH9NvDPk9zR3RN/Y5K3JzmY5M4k/xYgySuS3Jbk/UnuTfLbSfYk+askn0nyPRPuh/Q0p026AGkduQr41aq6BCDJXgZ3v/zBJM8GPpHkY92+LwK+n8GthO8DbqiqC7qHBb0JePPaly+NZhBIJ+5HgfOTXNqtfweD+7w8Dhys7pbBSb4ILATEZ4AL17pQ6XgMAunEBXhTVd36tMbkFcA3h5qeHFp/Ev/f6RTjGIE0vq8Bzx1avxV4Y3frb5L8k+7hMNK64m8m0vjuBI4l+RsGz5R9J4OZRJ/ubok8zzp/RKLa5N1HJalxnhqSpMYZBJLUOINAkhpnEEhS4wwCSWqcQSBJjTMIJKlx/x9008B9QOUHlgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "pyplot.plot(similarity_vector[sorted_item_indices[0:k]], 'ro')\n",
    "pyplot.ylabel('Similarity')\n",
    "pyplot.xlabel('Item')\n",
    "pyplot.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The operation is performed for all items.\n",
    "A simple strategy to perform it efficiently is to vectorize the most computationally intensive part, the dot product, on a group of items. The speedup can be of a factor of 10-100.\n",
    "This strategy is limited by the fact that the result of the dot product is a huge item-item dense similarity which likely does not fit in memory."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "import traceback\n",
    "\n",
    "try:\n",
    "    numerator_matrix = ICM_all.dot(ICM_all.T).toarray()\n",
    "    \n",
    "except Exception as e:\n",
    "    traceback.print_exc()\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The solution is:\n",
    "* Compute the numerator a block of items at a time leveraging vectorization while not running out of memory\n",
    "* Extract the k-nn on those items\n",
    "* Built incrementally the sparse similarity matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(100, 10681)"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "block_size = 100\n",
    "\n",
    "numerator_block = ICM_all[0:block_size].dot(ICM_all.T).toarray()\n",
    "numerator_block.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's compare the speed to compute the dot product on the whole similarity of the two strategies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Computing the similarity one item at a time runs at 925.00 items/sec\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "\n",
    "n_items = ICM_all.shape[0]\n",
    "\n",
    "start_time = time.time()\n",
    "\n",
    "for n_item in range(n_items):\n",
    "    numerator_vector = ICM_all[item_id].dot(ICM_all.T).toarray().ravel()\n",
    "    \n",
    "end_time = time.time()\n",
    "\n",
    "print(\"Computing the similarity one item at a time runs at {:.2f} items/sec\".format(n_items/(end_time-start_time)))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Computing the similarity in blocks of 100 items at a time runs at 24707.12 items/sec\n"
     ]
    }
   ],
   "source": [
    "n_items = ICM_all.shape[0]\n",
    "\n",
    "start_time = time.time()\n",
    "\n",
    "block_size = 100\n",
    "blocks_start_positions = range(0, n_items, block_size)\n",
    "\n",
    "for start_pos in blocks_start_positions:\n",
    "    end_pos = min(start_pos + block_size, n_items)\n",
    "    \n",
    "    numerator_block = ICM_all[start_pos:end_pos].dot(ICM_all.T).toarray()\n",
    "    \n",
    "end_time = time.time()\n",
    "\n",
    "print(\"Computing the similarity in blocks of 100 items at a time runs at {:.2f} items/sec\".format(n_items/(end_time-start_time)))\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### In this case the vectorized implementation runs >50 times faster!\n",
    "\n",
    "#### Usually most of the speed gain comes with blocks of 100 or so items, depending on the system. Much higher than that tends to not be beneficial while requiring increasingly more memory."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now an example of something you should *never* do, nested loops to compute the similarity of each item without vectorization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Computing the similarity with nested loops runs at 25.64 items/sec\n"
     ]
    }
   ],
   "source": [
    "n_items = 100\n",
    "\n",
    "start_time = time.time()\n",
    "\n",
    "for n_item in range(n_items):\n",
    "    for second_item in range(n_items):\n",
    "        numerator_vector = ICM_all[item_id].dot(ICM_all[second_item].T)\n",
    "    \n",
    "end_time = time.time()\n",
    "\n",
    "print(\"Computing the similarity with nested loops runs at {:.2f} items/sec\".format(n_items/(end_time-start_time)))\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You see how incredibly slow nested loops are compared to a well vectorized implementation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test our CBF recommender:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Similarity column 10681 ( 100 % ), 3454.08 column/sec, elapsed time 0.05 min\n"
     ]
    }
   ],
   "source": [
    "recommender = ItemKNNCBFRecommender(URM_train, ICM_all)\n",
    "recommender.fit(shrink=0.0, topK=50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[4759  554  851   38 3256]\n",
      "[ 402  133    7 8913  192]\n",
      "[ 798 5138 5018 9277 5660]\n",
      "[8913  423  468 6833  175]\n",
      "[ 176  403 1184 1314    7]\n",
      "[ 37  19  24 798  60]\n",
      "[ 144 1139 1184  150 2207]\n",
      "[3736 1234 2065 2952 2093]\n",
      "[ 101  616 4584 1994  788]\n",
      "[1310 6578 7118 6791   26]\n"
     ]
    }
   ],
   "source": [
    "for user_id in range(10):\n",
    "    print(recommender.recommend(user_id, at=5))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Let's talk about speed\n",
    "\n",
    "#### Time to compute recommendations for a fixed group of users"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reasonable implementation speed is 533.75 usr/sec\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "\n",
    "n_users_to_test = 1000\n",
    "\n",
    "start_time = time.time()\n",
    "\n",
    "for user_id in range(n_users_to_test):\n",
    "    recommender.recommend(user_id, at=5)\n",
    "    \n",
    "end_time = time.time()\n",
    "\n",
    "print(\"Reasonable implementation speed is {:.2f} usr/sec\".format(n_users_to_test/(end_time-start_time)))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Let's add a common mistake.... a CSC URM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Similarity column 10681 ( 100 % ), 3753.99 column/sec, elapsed time 0.05 min\n"
     ]
    }
   ],
   "source": [
    "URM_train_csc = URM_train.tocsc()\n",
    "\n",
    "recommender = ItemKNNCBFRecommender(URM_train_csc, ICM_all)\n",
    "recommender.fit(shrink=0.0, topK=50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Exception index 10691 is out of bounds for axis 0 with size 10681\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Traceback (most recent call last):\n",
      "  File \"<ipython-input-43-cd8883de5544>\", line 10, in <module>\n",
      "    recommender.recommend(user_id, at=5)\n",
      "  File \"<ipython-input-24-88972fee293e>\", line 25, in recommend\n",
      "    scores = self.filter_seen(user_id, scores)\n",
      "  File \"<ipython-input-24-88972fee293e>\", line 40, in filter_seen\n",
      "    scores[user_profile] = -np.inf\n",
      "IndexError: index 10691 is out of bounds for axis 0 with size 10681\n"
     ]
    }
   ],
   "source": [
    "import time, traceback\n",
    "\n",
    "try:\n",
    "\n",
    "    n_users_to_test = 1000\n",
    "\n",
    "    start_time = time.time()\n",
    "\n",
    "    for user_id in range(n_users_to_test):\n",
    "        recommender.recommend(user_id, at=5)\n",
    "\n",
    "    end_time = time.time()\n",
    "\n",
    "    print(\"Wrong implementation speed is {:.2f} usr/sec\".format(n_users_to_test/(end_time-start_time)))\n",
    "\n",
    "    \n",
    "except Exception as e:\n",
    "        \n",
    "    print(\"Exception {}\".format(str(e)))\n",
    "    traceback.print_exc()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Hyperparameter tuning\n",
    "\n",
    "#### Once we have built our model we can play with its hyperparameters\n",
    "* Number of neighbors\n",
    "* Shrinkage\n",
    "* Similarity type"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Number of neighbors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ItemKNNCBFRecommender: URM Detected 1689 (2.36 %) cold users.\n",
      "ItemKNNCBFRecommender: URM Detected 56 (0.52 %) cold items.\n",
      "Similarity column 10681 ( 100 % ), 11635.25 column/sec, elapsed time 0.02 min\n",
      "EvaluatorHoldout: Processed 61000 ( 87.39% ) in 30.35 sec. Users per second: 2010\n",
      "EvaluatorHoldout: Processed 69804 ( 100.00% ) in 35.16 sec. Users per second: 1985\n",
      "ItemKNNCBFRecommender: URM Detected 1689 (2.36 %) cold users.\n",
      "ItemKNNCBFRecommender: URM Detected 56 (0.52 %) cold items.\n",
      "Similarity column 10681 ( 100 % ), 12027.79 column/sec, elapsed time 0.01 min\n",
      "EvaluatorHoldout: Processed 57000 ( 81.66% ) in 30.15 sec. Users per second: 1891\n",
      "EvaluatorHoldout: Processed 69804 ( 100.00% ) in 38.47 sec. Users per second: 1814\n",
      "ItemKNNCBFRecommender: URM Detected 1689 (2.36 %) cold users.\n",
      "ItemKNNCBFRecommender: URM Detected 56 (0.52 %) cold items.\n",
      "Similarity column 10681 ( 100 % ), 11173.96 column/sec, elapsed time 0.02 min\n",
      "EvaluatorHoldout: Processed 45000 ( 64.47% ) in 30.59 sec. Users per second: 1471\n",
      "EvaluatorHoldout: Processed 69804 ( 100.00% ) in 46.93 sec. Users per second: 1487\n",
      "ItemKNNCBFRecommender: URM Detected 1689 (2.36 %) cold users.\n",
      "ItemKNNCBFRecommender: URM Detected 56 (0.52 %) cold items.\n",
      "Similarity column 10681 ( 100 % ), 11835.00 column/sec, elapsed time 0.02 min\n",
      "EvaluatorHoldout: Processed 41000 ( 58.74% ) in 30.09 sec. Users per second: 1362\n",
      "EvaluatorHoldout: Processed 69804 ( 100.00% ) in 50.11 sec. Users per second: 1393\n",
      "ItemKNNCBFRecommender: URM Detected 1689 (2.36 %) cold users.\n",
      "ItemKNNCBFRecommender: URM Detected 56 (0.52 %) cold items.\n",
      "Similarity column 10681 ( 100 % ), 12260.15 column/sec, elapsed time 0.01 min\n",
      "EvaluatorHoldout: Processed 40000 ( 57.30% ) in 30.61 sec. Users per second: 1307\n",
      "EvaluatorHoldout: Processed 69804 ( 100.00% ) in 51.46 sec. Users per second: 1357\n"
     ]
    }
   ],
   "source": [
    "from KNN.ItemKNNCBFRecommender import ItemKNNCBFRecommender\n",
    "\n",
    "x_tick = [10, 50, 100, 200, 500]\n",
    "MAP_per_k = []\n",
    "\n",
    "for topK in x_tick:\n",
    "    \n",
    "    recommender = ItemKNNCBFRecommender(URM_train, ICM_all)\n",
    "    recommender.fit(shrink=0.0, topK=topK)\n",
    "    \n",
    "    result_dict, _ = evaluator_test.evaluateRecommender(recommender)\n",
    "    \n",
    "    MAP_per_k.append(result_dict[10][\"MAP\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEGCAYAAABy53LJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deXhc9X3v8fdXo82SJXmXd8sSNmAIGCObLWBoA4WQYpJLE5IngaRJjEnoTbeb0uYuze1tm6a3bZKneewYQmJCLoQsFDchC1kwO5IMxhhsg7EkLK9abNmWbMuSvvePOcIjWbY0ko7OaObzep55ZuacM6PvTw/Wh99ZvsfcHRERkcHKiroAEREZWxQcIiKSFAWHiIgkRcEhIiJJUXCIiEhSsqMuYDRMmTLFy8rKoi5DRGRM2bhxY5O7T+27PCOCo6ysjJqamqjLEBEZU8ysvr/l2lUlIiJJUXCIiEhSFBwiIpIUBYeIiCRFwSEiIklRcIiISFIUHCIikhQFR8Q21rfw/NtNUZchIjJoCo6IfemxLXzygWo21h+MuhQRkUFRcEToUHsH2/YdoaOrm1UPbWRv67GoSxIRGZCCI0LVdfFZxt/deiHHOrpY+eBGjp/sirgqEZGzU3BEqLquhdxYFn906Wy+9pHFbNnTyl/9eDO6na+IpDIFR4Reqm3h4jkl5OfEeN+iUv7yhnN5fNMevvX0zqhLExE5o1CDw8xuNLPtZrbDzO7tZ72Z2TeC9ZvNbEnCuglm9iMz22ZmW83simD5JDN70szeCp4nhjmGsLSd6OT13a0smz/p3WWfu7aCD1w0g3/6xTZ+u21/hNWJiJxZaMFhZjHgm8BNwCLgo2a2qM9mNwELgsdKYHXCuq8Dv3D384CLga3B8nuB37j7AuA3wfsx55V3DtHZ7SwtOxUcZsY/33YxF8ws5gsPb2LHgSMRVigi0r8wZxzLgB3uvtPdO4BHgBV9tlkBPOhxLwITzGyGmRUD1wDfBnD3Dnc/lPCZdcHrdcCtIY4hNFV1LWQZXDqv94RpXG6MtZ+oJC8ni8+sq6G1/WREFYqI9C/M4JgF7Ep43xAsG8w25UAj8B0ze8XM7jezwmCbUnffCxA8T+vvh5vZSjOrMbOaxsbG4Y9mhFXVNrNoZjFF+TmnrZs5YRxrPn4puw8d456HX6azqzuCCkVE+hdmcFg/y/qeLnSmbbKBJcBqd78EaCPJXVLuvtbdK929curU0+58GKmOzm5eeecQy8omn3GbyrJJ/J9bL+SZt5r4x59vG8XqRETOLszgaADmJLyfDewZ5DYNQIO7vxQs/xHxIAHYb2YzAILnAyNcd+he232IE53dLJt/9uP6H1k6l09eWca3n63lhzW7zrqtiMhoCTM4qoEFZjbfzHKB24H1fbZZD9wRnF11OdDq7nvdfR+wy8zODbb7feCNhM/cGby+E3g8xDGEoqo2fuFf4oHxM/nvN5/PVedM5kuPbVFbEhFJCaEFh7t3AvcAvyR+RtSj7v66ma0ys1XBZk8AO4EdwH3A5xK+4k+A75vZZmAx8A/B8q8A15vZW8D1wfsxpaq2mYqphUwenzfgttmxLP79o0uYMSGfVQ9tZF/r8VGoUETkzCwTrlKurKz0mpqaqMsAoKvbWfzlX/GBi2fyjx96z6A/9+b+I3zwm89RMW08j951Bfk5sRCrFBEBM9vo7pV9l+vK8VG2bd9hjpzoHPD4Rl8LS4v4+u2X8NputSURkWgpOEZZVW0LAMvmn/mMqjNRWxIRSQUKjlFWXdfCrAnjmDVh3JA+r7YkIhI1Bccocneqalt69adKVk9bkkUz1JZERKKh4BhFtU1tNB3tGFZwQLwtyX13qC2JiERDwTGKeo5vDOb6jYGoLYmIREXBMYqq6lqYXJhLxdTCgTcehMS2JF9RWxIRGSXZUReQSapqW1haNgmz/lp0Dc1Hls5l694j3P9sLefNKOa2S2eP2HeLiPRHM45RsufQMRoOHhv28Y3+9LQl+ZufvMbL76gtiYiES8ExSqrreq7fGPng6GlLMr0kn7u+p7YkIhIuBccoeam2hfF52Zw/oziU759YmMv9d1bSfqKTld+r4fjJrlB+joiIgmOUVNe2cOm8icSyRu74Rl8LS4v4mtqSiEjIFByjoKWtg7cOHA1lN1Vf1y8q5S+uX6i2JCISGgXHKAjz+EZ/Pn/dOe+2JfndtjF3nysRSXEKjlFQVdtCbnYWF80uGZWfl9iW5L8+/IrakojIiFJwjILquhYumTOBvOzRu4fGuNwYa4O2JJ99cKPakojIiFFwhOzoiU627G4dtd1UiWYFbUkaDrarLYmIjBgFR8herj9It4/e8Y2+Kssm8Xcr1JZEREaOWo6ErKq2hViWsWRucnf8G0m3L5vLtn1qSyIiI0MzjpBV1bVw4cxiCvOizegv3Xw+V1aoLYmIDJ+CI0QnOrvYtOvQiLRRH66cWBbf/JjakojI8Ck4QrS5oZWOzu7Ijm/0pbYkIjISFBwhGskbN42UhaVF/NtHFrO5oZV71ZZERIZAwRGil2pbWFg6nomFuVGX0ssNF0znL29YyH9s2sN9z6gtiYgkR8ERks6ubl6uP5hSs41En7/uHK5fVMq/PvkmLW0dUZcjImOIgiMkW/ce4eiJzpQ5vtGXmfHFPziX4ye7Wfd8XdTliMgYouAISdUoNzYcigWlRbzv/FLWvVBHe0dn1OWIyBih4AhJVW0zcyaNY0bJuKhLOau7ry3nUPtJflC9K+pSRGSMUHCEwN2prjvIsrLJUZcyoEvnTWJp2UTuf6aWk+plJSKDoOAIwduNR2lp62DZ/OjajCRj1fIKdh86xn++uifqUkRkDFBwhKCqNt7SY9n81J9xAFx37jTOLS3iWxt26roOERmQgiMEVbXNTBmfR9nkgqhLGZSsLOOu5eVs33+E323XHQNF5OwUHCGorjvIZfMnYWZRlzJof3jxTGZNGMfqp96OuhQRSXEKjhHWcLCd3YeOsbRsbBzf6JETy+LT751Pdd1BNta3RF2OiKQwBccI6+lPNVaObyS6fdkcJhTksPoptSERkTMLNTjM7EYz225mO8zs3n7Wm5l9I1i/2cyWJKyrM7PXzGyTmdUkLP9bM9sdLN9kZu8PcwzJqq5roSg/m3OnF0VdStIKcrO584oyfr11P2/tPxJ1OSKSokILDjOLAd8EbgIWAR81s0V9NrsJWBA8VgKr+6y/zt0Xu3tln+X/Fixf7O5PhFD+kL1U28LSsknEssbO8Y1Ed15ZRn5OFms2aNYhIv0Lc8axDNjh7jvdvQN4BFjRZ5sVwIMe9yIwwcxmhFhTqJqOnmBnY1vKNjYcjEmFudy+dC6Pb9rNnkPHoi5HRFJQmMExC0jsY9EQLBvsNg78ysw2mtnKPp+7J9i19YCZ9XsU2sxWmlmNmdU0NjYOfRRJqK5N/f5Ug/GZq+fjwLefrY26FBFJQWEGR3/7avpeXXa2ba5y9yXEd2d93syuCZavBiqAxcBe4F/6++HuvtbdK929curUqUkXPxRVdS3k52Txnlklo/LzwjJ7YgG3XDyTh6ve4VC7Wq6LSG9hBkcDMCfh/Wygb0+LM27j7j3PB4DHiO/6wt33u3uXu3cD9/UsTwVVtS1cMmciudlj/2S1u5aX097RxYMv1EddioikmDD/wlUDC8xsvpnlArcD6/tssx64Izi76nKg1d33mlmhmRUBmFkhcAOwJXifeAzkgz3Lo3b4+Em27j085ndT9ThvejG/d940vvt8Hcc6dG9yETkltOBw907gHuCXwFbgUXd/3cxWmdmqYLMngJ3ADuKzh88Fy0uBZ83sVaAK+Jm7/yJY99XgNN3NwHXAn4U1hmRsrD9It4/94xuJVi2voKWtgx9uVMt1ETklO8wvD06VfaLPsjUJrx34fD+f2wlcfIbv/MQIlzkiqmtbyM4yLpk7IepSRszSsoksmTuBtU/v5GPL5pIdG/u74ERk+PSXYIRU1bZw4awSCnJDzeJRZWasWl5Bw8Fj/Oy1vVGXIyIpQsExAo6f7OLVhkNclka7qXq87/xSzpk2njVquS4iAQXHCNi06xAnu3xMX/h3JllZxl3XlLN172E2vDk618OISGpTcIyAqtoWzEjL4ABYsXgWM0ry1XJdRAAFx4iormvh3NIiSgpyoi4lFLnZ8ZbrL9W28Mo7B6MuR0QipuAYps6ubjbWH0yr03D7c/uyuZSMy2HNBs06RDKdgmOYXt9zmPaOrrTdTdVjfF42d1wxj1+9sZ8dB45GXY6IREjBMUxVadLYcDDuvLKM3FgWa5/WrEMkkyk4hqmqroWyyQWUFudHXUropozP48OVc3jsld3saz0edTkiEhEFxzB0dzvVdS1pv5sq0cpryul2eOA5tVwXyVQKjmHY0XiUQ+0nM2I3VY85kwq4+T0z+P6L9bS2n4y6HBGJgIJjGF7KoOMbie5aXk5bRxcPvaSW6yKZSMExDFW1LZQW5zF3UkHUpYyqC2aWsHzhVL7zXC3HT6rlukimUXAMkbtTXRs/vmHW340M09uq5RU0He3gRxsboi5FREaZgmOIdrUcY9/h42nZ2HAwLi+fxMVz4i3XO7u6oy5HREaRgmOIqurixzeWZmhwmBl3Ly/nnZZ2fr5lX9TliMgoUnAMUVVtMyXjclg4rSjqUiJzw6LplE8tZM2Gt9VyXSSDKDiGqLruIEvLJpKVlXnHN3r0tFx/fc9hnt3RFHU5IjJKFBxDcODIcWqb2jLuNNz+3HrJLEqL89T8UCSDKDiGoLo23lo8k64YP5O87Bh/fNV8ntvRzOaGQ1GXIyKjQMExBFW1zYzLiXHhrJKoS0kJH7tsLkX52Zp1iGQIBccQVNUd5NJ5E8mJ6dcHUJSfwycun8fPt+yjtqkt6nJEJGT6y5ek1vaTbNt3WLup+vjUVfPJUct1kYyg4EhSTX0L7pnXn2ogU4vyuO3S2fx4424OHFbLdZF0puBIUlVdCzkx45K5E6IuJeWsvLqczu5uHniuLupSRCRECo4kVdW2cNHsCeTnxKIuJeWUTSnkpqDl+uHjarkukq4UHEk41tHFaw2tOr5xFncvr+DIiU6+/+I7UZciIiFRcCThlXcO0tntGdvYcDAunFXC1Qum8IBaroukLQVHEqrqWjCDJfMmRl1KSlu1vILGIyd47JXdUZciIiFQcCShqraF86cXUzIuJ+pSUtqVFZN5z6wS1j69k65uNT8USTcKjkHq6Ozm5XcO6jTcQTAzVi2voLapjV++rpbrIulGwTFIW/a0cvxkt4JjkG68cDplkwvUcl0kDSk4Bqm6Nrhxk86oGpRYlrHymgo2N7TywtvNUZcjIiNoUMFhZlPCLiTVVdW2UD6lkKlFeVGXMmZ8aMkspozPY7WaH4qklbMGh5n9oZk1Aq+ZWYOZXTlKdaWU7m6nuq5Fu6mSlJ8T44/fW8YzbzWxZXdr1OWIyAgZaMbx98DV7j4D+C/APybz5WZ2o5ltN7MdZnZvP+vNzL4RrN9sZksS1tWZ2WtmtsnMahKWTzKzJ83sreA59HNjt+8/wuHjndpNNQQfv3weRXlquS6STgYKjk533wbg7i8Bg77BtpnFgG8CNwGLgI+a2aI+m90ELAgeK4HVfdZf5+6L3b0yYdm9wG/cfQHwm+B9qKqC4xuacSSvOD+Hj10+lyde20t9s1qui6SDgYJjmpn9ec+jn/dnswzY4e473b0DeARY0WebFcCDHvciMMHMZgzwvSuAdcHrdcCtA2w/bFV1LcwoyWf2xHFh/6i09Omr5pOdlcV9z+yMuhQRGQEDBcd9xGcZPY/E9+MH+OwsYFfC+4Zg2WC3ceBXZrbRzFYmbFPq7nsBgudp/f1wM1tpZjVmVtPY2DhAqWfm7lTVxo9vmNmQvyeTTSvO50NLZvFoTQONR05EXY6IDFP22Va6+5fPtM7Mlg7w3f39le17Qv/ZtrnK3feY2TTgSTPb5u5PD/AzT32J+1pgLUBlZeWQLySob26n8cgJHd8YppXXlPODml189/la/tsfnBd1OSIyDEldx2Fmi8zsf5vZW5x+PKKvBmBOwvvZwJ7BbuPuPc8HgMeI7/oC2N+zOyt4PpDMGJLVc3xDjQ2Hp3zqeG68YDrfe6Geoyc6oy5HRIZhwOAws3lmdq+ZvQp8D/gccH2fA9b9qQYWmNl8M8sFbgfW99lmPXBHcHbV5UCru+81s0IzKwp+fiFwA7Al4TN3Bq/vBB4feJhDV1XXwsSCHM6ZNtCeORnIquUVHD7eycMvqeW6yFg20HUczwNPADnAbe5+KXDE3esG+mJ37wTuAX4JbAUedffXzWyVma0KNnsC2AnsIH785HPB8lLg2SCsqoCfufsvgnVfAa4PZj3XB+9DU1XbwtIyHd8YCRfPmcAV5ZO5/9mdnOhUy3WRseqsxziARuK7j0qBqcBbnH6c4ozc/Qni4ZC4bE3Cawc+38/ndgIXn+E7m4HfH2wNw7Gv9TjvtLRzxxXzRuPHZYS7r63gjgeqePyVPXx46ZyBPyAiKeesMw53XwG8B3gZ+LKZ1QITzWzZ2T6XLqrqdP3GSLt6wRQumFnMmqffplst10XGpAGPcbh7q7s/4O7XA5cD/wv4mpntGuCjY15VbTOFuTEWzSiOupS0YWbctbyCnY1t/OqN/VGXIyJDkNRZVe6+392/4e5XAu8NqaaU8fHL5/HV2y4mO6YmwiPp/RdOZ+4ktVwXGavOeozDzPqeBdXXLSNYS8o5b3ox503XbGOkZcey+Ow15fyP/9jCS7UtXF4+OeqSRCQJAx0cv4L4ld0PAy/R/wV7Ikn7o0tn8/Vfv8maDW8rOETGmIH2wUwH/ga4EPg68dNfm9x9g7tvCLs4SV/5OTE+eWUZT21v5I09h6MuR0SSMNBZVV3u/gt3v5P4gfEdwFNm9iejUp2ktU9cXkZhboxvPa2W6yJjyWCuHM8zsw8BDxG/5uIbwE/CLkzSX0lBDh+7bC4/3byXXS3tUZcjIoM00JXj64DngSXAl919qbv/nbvvHpXqJO19+r3lZBncr5brImPGQDOOTwALgS8Az5vZ4eBxxMy0Y1qGbXpJPrcunsUPanbRfFQt10XGgoGOcWS5e1HwKE54FLm7zlOVEXHX8nJOdHaz7vm6qEsRkUHQlW0SuXOmFXH9+aWse6GeNrVcF0l5Cg5JCauuraD12EkeqU77TjYiY56CQ1LCkrkTWTZ/Evc/s5OOzu6oyxGRs1BwSMq4+9oK9rYeZ/2rfW8UKSKpRMEhKePahVM5b3oR39qglusiqUzBISnDzFi1vIK3Dhzlt9tCvZW8iAyDgkNSygcumsHsieNYvUFtSERSlYJDUkp2LIvPXl3OxvqDVAd3YBSR1KLgkJTz4co5TCrMZc1TmnWIpCIFh6Sccbkx7ryijN9sO8D2fUeiLkdE+lBwSEq644p5FOTG+JaOdYikHAWHpKSJhbncvnQu61/dw+5Dx6IuR0QSKDgkZX3m6vmAWq6LpBoFh6SsmRPGccvimTxStYuDbR1RlyMiAQWHpLRVyys4drKLdS/URV2KiAQUHJLSFpYW8b7zp7Hu+TraO9RyXSQVKDgk5a1aXsHB9pM8qpbrIilBwSEpr7JsEpXzJnLfM7Wc7FLLdZGoKThkTLj72gp2HzrGTzer5bpI1BQcMiZcd+40FpaO51sbduKulusiUVJwyJiQlWXcdU0F2/Yd4antjVGXI5LRFBwyZtyyeCYzS/JZreaHIpFScMiYkRPL4jNXl1NV18LG+oNRlyOSsRQcMqbcvmwOEwpyWKPmhyKRCTU4zOxGM9tuZjvM7N5+1puZfSNYv9nMlvRZHzOzV8zspwnL/tbMdpvZpuDx/jDHIKmlIDebO64o48k39rPjgFqui0QhtOAwsxjwTeAmYBHwUTNb1Gezm4AFwWMlsLrP+i8AW/v5+n9z98XB44mRrVxS3SevLCM/J4s1G9T8UCQKYc44lgE73H2nu3cAjwAr+myzAnjQ414EJpjZDAAzmw3cDNwfYo0yBk0KWq4/vmk3e1vVcl1ktIUZHLOAxB4RDcGywW7zNeCLQH+XCt8T7Np6wMwm9vfDzWylmdWYWU1jo07fTDeffu98uh2+/Uxt1KWIZJwwg8P6Wdb3yq1+tzGzDwAH3H1jP+tXAxXAYmAv8C/9/XB3X+vule5eOXXq1CTKlrFgzqQC/vCiGTxc9Q6H2tVyXWQ0hRkcDcCchPezgb79Is60zVXALWZWR3wX1++Z2UMA7r7f3bvcvRu4j/guMclAq66toK2ji++9UB91KSIZJczgqAYWmNl8M8sFbgfW99lmPXBHcHbV5UCru+91979299nuXhZ87rfu/nGAnmMggQ8CW0Icg6Sw86YXc925U/nu83UcP9kVdTkiGSO04HD3TuAe4JfEz4x61N1fN7NVZrYq2OwJYCewg/js4XOD+OqvmtlrZrYZuA74s5GvXsaKVcsraG7r4Ic1arkuMlosExrGVVZWek1NTdRlSAjcnQ+tfp6moyf43V9cS3ZM17SKjBQz2+julX2X61+ZjGlmxt3LK9jVcoyfvbY36nJEMoKCQ8a8951fyjnTxrNGLddFRoWCQ8a8rCxj5TXlbN17mKffaoq6HJG0p+CQtHDr4llML85n9VM7oi5FJO0pOCQt5GZn8Zmr5/PizhY27ToUdTkiaU3BIWnj9mVzKc7PZo1u9CQSKgWHpI3xefGW6798Yx9vNx6NuhyRtKXgkLTyyavKyI1lsVYt10VCo+CQtDJlfB4frpzDY6/sZv/h41GXI5KWFBySdj57dTmd3d088KxarouEQcEhaWfu5AJuvmgm33/pHVqPnYy6HJG0o+CQtLRqeTlHT3Ty0ItquS4y0hQckpYumFnCNQun8p3n1HJdZKQpOCRtrVpeTtPRE/z45YaoSxFJKwoOSVtXlE/m4tklrH16J13dan4oMlIUHJK2zIy7r62gvrmdn29Ry3WRkaLgkLR2/aLplE8pZM2Gt9VyXWSEKDgkrcWClutbdh/muR3NUZcjkhYUHJL2PrhkFtOK8lizQc0PRUaCgkPSXl52jE+/dz7P7mjitYbWqMsRGfMUHJIRPnbZXIryszXrEBkBCg7JCEX5OXz88nn8fMte6praoi5HZExTcEjG+NRVZWTHslj7jFquiwyHgkMyxrSifG67dDY/2tjAgSNquS4yVAoOySgrry6ns6ub1U+9rR5WIkOUHXUBIqOpbEohN180k+88V8d3nqtjRkk+8yYXUDa5kLIphZRNLmDe5ELmTS6gIFf/PET6o38ZknH++baLuH5RKfVNbdQ1t1PX3Mavt+6n6WhHr+2mFeX1CpN4uMRfj8/TPx3JXPqvXzJOfk6MWy6eedryI8dPUh8ESX1zO3VN8eentjdy4EjvDrtTxue9Gyjzp5wKlnlTCijOzxmtoYhEQsEhEijKz+HCWSVcOKvktHVtJzqpb26nvjmYpTS1UdfcxnM7mvjxy70PtE8uzH1399e8YJZSFgRLSYFCRcY+BYfIIBTmZbNoZjGLZhaftu5YRxf1LW3UNfUOlhd3NvOTV3b32nZCQU58ltKz+yuYrcyfXMiEghzMbLSGJDJkCg6RYRqXG+O86cWcN/30UDl+sotdLe29Zin1ze3U1B/k8Vf3kNiwtzg/m7IpwSzl3QP28WCZXJirUJGUoeAQCVF+TowFpUUsKC06bd2Jzi52tRx7d5ZS39xGbVMbr+46xM827yHx3lNFednMe/dYSs+xlfjZX1PH5ylUZFQpOEQikpcd45xp4zln2vjT1nV0drP70LFes5S65jZe393KL7bs63VHw4Lc2KlZSp+zwEqLFSoy8hQcIikoNzuL+VPis4q+TnZ1s+fQsdN2f23ff4Rfb93Pya5ToZKfkxUcpD/9YP304nyyshQqkjwFh8gYkxPLCi5SLGT5wqm91nV2dbO39Th1Pbu/gmB5u7GN321rpKOr+91tc7OzmDfp9FlK2ZQCZpSMI6ZQkTMINTjM7Ebg60AMuN/dv9JnvQXr3w+0A59095cT1seAGmC3u38gWDYJ+AFQBtQBH3b3g2GOQ2SsyI5lMWdSAXMmFXD1gt7rurqdfYeP9979Fbx++s1GTnQmhEosizmTxr07S0m8VmXmhHyyY+pWlMlCC47gj/43geuBBqDazNa7+xsJm90ELAgelwGrg+ceXwC2Aomnq9wL/Mbdv2Jm9wbv/yqscYiki1iWMWvCOGZNGMdV50zpta6729l/5Dh1Te3BbKWN+uD18283cyyhr1dOzJgzsYB5kxMO1k+Jn1I8a+I4chQqaS/MGccyYIe77wQws0eAFUBicKwAHnR3B140swlmNsPd95rZbOBm4O+BP+/zmWuD1+uAp1BwiAxLVpYxo2QcM0rGcUXF5F7r3J0DR068eyV9T7DUNbVTVdtCW8epUIllGbMnjuv3WpU5EwvIzVaopIMwg2MWsCvhfQO9ZxNn2mYWsBf4GvBFoO95jKXuvhcgCJhp/f1wM1sJrASYO3fuEIcgImZGaXE+pcX5XFZ+eqg0He0IgqR3sLxSf5AjJzrf3TbLYNbEcb0O1vccU5k9sYD8nNhoD02GKMzg6O/Img9mGzP7AHDA3Tea2bVD+eHuvhZYC1BZWdn354rICDAzphblMbUoj6Vlk3qtc3da2jrevUalrunU9SrrN+3h8PHOhO+BmSXj3p2dJF6rMneSQiXVhBkcDcCchPezgT2D3OY24BYzez+QDxSb2UPu/nFgf8LurBnAgdBGICJDZmZMHp/H5PF5XDpv4mnrD7Z19LpGpb65ndqmNn7+2l4Otp/sta3a36eWMH/j1cACM5sP7AZuBz7WZ5v1wD3B8Y/LgNZgN9RfBw+CGcdfBqHR85k7ga8Ez4+HOAYRCcnEwlwmFuZyydzTQ6W1/ST1LfEr6ROD5ck39tPc1rv9fWlxXq9Zitrfhy+036q7d5rZPcAviZ+O+4C7v25mq4L1a4AniJ+Ku4P46bifGsRXfwV41Mw+DbwD/FEY9YtIdEoKcrioYAIXzZ5w2rrDx0/yTs+xlITdX7/b3kjjGdrf971WRe3vh8fc03/3f2VlpV6JrwMAAAajSURBVNfU1ERdhoiE7OiJTuoTd381tVPb3EZ9cxv7D5/ota3a3w/MzDa6e2Xf5ZrHiUjaGJ+XzQUzS7hg5un3VGnv6OSdlvZes5S6pnZeUPv7pCk4RCQjFORmn7X9/alQORUs1XVqf98fBYeIZLz8nBgLS4tYeMb29+3vXlXfsxts066DGdv+XsEhInIW8fb3RZwz7fRQ6ejspuFg+7tX0vfcW2VLmre/V3CIiAxRbnYW5VPHUz719HuqnOzqZvfBY72uUalvbmP7viM8+cZ+OrvHbvt7BYeISAhyYlnxmUU/91Tp7Opmz6HjQaicuk/9jgNHx0T7ewWHiMgoy45lMXdyAXMnFwC976nS1e3sbT2WcEzl1MH6DW820pEC7e8VHCIiKSTeYTje+PG9C05vf7/v8PHT7qdS39zOc283cfzkqVDpaX//Dx96D5f3aU45XAoOEZExIivLmDlhHDMnjOPKit7r3J39h0+cNkuZVJg74nUoOERE0oCZMb0kn+kl+SM+w+hLd1UREZGkKDhERCQpCg4REUmKgkNERJKi4BARkaQoOEREJCkKDhERSYqCQ0REkpIRt441s0agfoDNpgBNo1BOqtG4M4vGnXmGM/Z57j6178KMCI7BMLOa/u6tm+407syicWeeMMauXVUiIpIUBYeIiCRFwXHK2qgLiIjGnVk07swz4mPXMQ4REUmKZhwiIpIUBYeIiCQl44PDzG40s+1mtsPM7o26npFmZg+Y2QEz25KwbJKZPWlmbwXPExPW/XXwu9huZn8QTdXDY2ZzzOx3ZrbVzF43sy8Ey9N93PlmVmVmrwbj/nKwPK3H3cPMYmb2ipn9NHifKeOuM7PXzGyTmdUEy8Idu7tn7AOIAW8D5UAu8CqwKOq6RniM1wBLgC0Jy74K3Bu8vhf4p+D1ouB3kAfMD343sajHMIQxzwCWBK+LgDeDsaX7uA0YH7zOAV4CLk/3cSeM/8+B/wf8NHifKeOuA6b0WRbq2DN9xrEM2OHuO929A3gEWBFxTSPK3Z8GWvosXgGsC16vA25NWP6Iu59w91pgB/Hf0Zji7nvd/eXg9RFgKzCL9B+3u/vR4G1O8HDSfNwAZjYbuBm4P2Fx2o/7LEIde6YHxyxgV8L7hmBZuit1970Q/yMLTAuWp93vw8zKgEuI/9932o872F2zCTgAPOnuGTFu4GvAF4HuhGWZMG6I/8/Br8xso5mtDJaFOvbsYRSbDqyfZZl8fnJa/T7MbDzwY+BP3f2wWX/Di2/az7IxOW537wIWm9kE4DEzu/Asm6fFuM3sA8ABd99oZtcO5iP9LBtz405wlbvvMbNpwJNmtu0s247I2DN9xtEAzEl4PxvYE1Eto2m/mc0ACJ4PBMvT5vdhZjnEQ+P77v6TYHHaj7uHux8CngJuJP3HfRVwi5nVEd/d/Htm9hDpP24A3H1P8HwAeIz4rqdQx57pwVENLDCz+WaWC9wOrI+4ptGwHrgzeH0n8HjC8tvNLM/M5gMLgKoI6hsWi08tvg1sdfd/TViV7uOeGsw0MLNxwPuAbaT5uN39r919truXEf83/Ft3/zhpPm4AMys0s6Ke18ANwBbCHnvUZwRE/QDeT/ysm7eBL0VdTwjjexjYC5wk/n8bnwYmA78B3gqeJyVs/6Xgd7EduCnq+oc45vcSn35vBjYFj/dnwLgvAl4Jxr0F+J/B8rQed5/fwbWcOqsq7cdN/IzQV4PH6z1/w8Ieu1qOiIhIUjJ9V5WIiCRJwSEiIklRcIiISFIUHCIikhQFh4iIJCXTrxwXGXFm1nMqJMB0oAtoDN4v83hftIG+42+Bo+7+f80sH/hP4Fl3/3IIJYskRcEhMsLcvRlYDL0DYCjfFVyY+mNgo0JDUoV2VYmMAjP7/eBeEa8F90jJC5bXmdk/BffRqDKzcxI+lk28hcZb7p5294qRsUvBIRK+fOC7wEfc/T3EA+HuhPWH3X0Z8O/Eu7z2+CLQ6e5/OlqFigyGgkMkfDGg1t3fDN6vI36DrR4PJzxfkbD8WeAKM1sYfokig6fgEAlf2wDr/Qyvnwb+FPi5mc0c8apEhkjBIRK+fKAs4fjFJ4ANCes/kvD8QuIH3f3HwD8Dv+jpfCsSNZ1VJRK+48CngB+aWTbxdv5rEtbnmdlLxP9H7qN9P+zua8xsOrDezG5w9+OjUbTImag7rkiEgpsPVbp7U9S1iAyWdlWJiEhSNOMQEZGkaMYhIiJJUXCIiEhSFBwiIpIUBYeIiCRFwSEiIkn5//ianfar4dfiAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "pyplot.plot(x_tick, MAP_per_k)\n",
    "pyplot.ylabel('MAP')\n",
    "pyplot.xlabel('TopK')\n",
    "pyplot.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### On this dataset the number of neighbors does not have a great impact on MAP. Higher values of TopK might work even better\n",
    "\n",
    "#### Different datasets will behave in different ways."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Shrinkage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ItemKNNCBFRecommender: URM Detected 1689 (2.36 %) cold users.\n",
      "ItemKNNCBFRecommender: URM Detected 56 (0.52 %) cold items.\n",
      "Similarity column 10681 ( 100 % ), 14254.25 column/sec, elapsed time 0.01 min\n",
      "EvaluatorHoldout: Processed 53000 ( 75.93% ) in 30.21 sec. Users per second: 1754\n",
      "EvaluatorHoldout: Processed 69804 ( 100.00% ) in 40.49 sec. Users per second: 1724\n",
      "ItemKNNCBFRecommender: URM Detected 1689 (2.36 %) cold users.\n",
      "ItemKNNCBFRecommender: URM Detected 56 (0.52 %) cold items.\n",
      "Similarity column 10681 ( 100 % ), 12536.15 column/sec, elapsed time 0.01 min\n",
      "EvaluatorHoldout: Processed 51000 ( 73.06% ) in 30.21 sec. Users per second: 1688\n",
      "EvaluatorHoldout: Processed 69804 ( 100.00% ) in 40.49 sec. Users per second: 1724\n",
      "ItemKNNCBFRecommender: URM Detected 1689 (2.36 %) cold users.\n",
      "ItemKNNCBFRecommender: URM Detected 56 (0.52 %) cold items.\n",
      "Similarity column 10681 ( 100 % ), 13383.70 column/sec, elapsed time 0.01 min\n",
      "EvaluatorHoldout: Processed 51000 ( 73.06% ) in 30.47 sec. Users per second: 1674\n",
      "EvaluatorHoldout: Processed 69804 ( 100.00% ) in 41.39 sec. Users per second: 1686\n",
      "ItemKNNCBFRecommender: URM Detected 1689 (2.36 %) cold users.\n",
      "ItemKNNCBFRecommender: URM Detected 56 (0.52 %) cold items.\n",
      "Similarity column 10681 ( 100 % ), 12519.38 column/sec, elapsed time 0.01 min\n",
      "EvaluatorHoldout: Processed 49000 ( 70.20% ) in 30.04 sec. Users per second: 1631\n",
      "EvaluatorHoldout: Processed 69804 ( 100.00% ) in 43.95 sec. Users per second: 1588\n",
      "ItemKNNCBFRecommender: URM Detected 1689 (2.36 %) cold users.\n",
      "ItemKNNCBFRecommender: URM Detected 56 (0.52 %) cold items.\n",
      "Similarity column 10681 ( 100 % ), 12818.53 column/sec, elapsed time 0.01 min\n",
      "EvaluatorHoldout: Processed 50000 ( 71.63% ) in 30.50 sec. Users per second: 1640\n",
      "EvaluatorHoldout: Processed 69804 ( 100.00% ) in 42.88 sec. Users per second: 1628\n",
      "ItemKNNCBFRecommender: URM Detected 1689 (2.36 %) cold users.\n",
      "ItemKNNCBFRecommender: URM Detected 56 (0.52 %) cold items.\n",
      "Similarity column 10681 ( 100 % ), 12986.04 column/sec, elapsed time 0.01 min\n",
      "EvaluatorHoldout: Processed 42000 ( 60.17% ) in 30.27 sec. Users per second: 1388\n",
      "EvaluatorHoldout: Processed 69804 ( 100.00% ) in 50.30 sec. Users per second: 1388\n"
     ]
    }
   ],
   "source": [
    "x_tick = [0, 10, 50, 100, 200, 500]\n",
    "MAP_per_shrinkage = []\n",
    "\n",
    "for shrink in x_tick:\n",
    "    \n",
    "    recommender = ItemKNNCBFRecommender(URM_train, ICM_all)\n",
    "    recommender.fit(shrink=shrink, topK=100)\n",
    "    \n",
    "    result_dict, _ = evaluator_test.evaluateRecommender(recommender)\n",
    "    \n",
    "    MAP_per_shrinkage.append(result_dict[10][\"MAP\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEGCAYAAABy53LJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAeRUlEQVR4nO3de5ScdZ3n8fenq7tzkUtAEggkGNQIiwIhNhHUcb0LjBqvO8EzEhj2IKvo6JzdEY9zwbO742WdUTnLwKJGgzqgw3jJjqyKrMyeUWHSQIBwk4BAGiI0COGSqq7bd/94ft15utPpqup0pS/1eZ1Tp576Pb+n6vcryPPp3/N76nkUEZiZmTWra7obYGZms4uDw8zMWuLgMDOzljg4zMysJQ4OMzNrSfd0N2B/OOyww2LFihXT3Qwzs1nl5ptvfiIiFo8t74jgWLFiBf39/dPdDDOzWUXSQ+OV+1CVmZm1xMFhZmYtcXCYmVlLHBxmZtYSB4eZmbXEwWFmZi1xcJiZWUs64ncc1li5Wufe3z3LloGneer5Mgt7C8zvKbCwN3ss6O3OnkfKulnQU2BBb4Hebv/9YdZJHBwdKCJ46Mld3DbwNFu2P81t259m66PPUK7WJ/V+3V1iQe/oQMnCppBb7s6FUIGFPVnd+SPLqXw4oFLd+d0Furo0xd+Ame0LB0cHeOK5IW5LAbFlYCe3bX+ancUKAAt6Cpxw1MGsP+1FrFp+CCctP5gjDppPsVKjWK6xKz2KlWr2XK5RrOTKy9Xcco1dlaysWKnx3FCVwWeHRtet1Gj13mHze7pGBVI+ZIZHPbsDaXTw7F4/unxhTzcLegv0FITkYDJrhYNjjtlVrrL1kWdSSGRhMfBUEYAuwbFHHMSZJxzBScsWcdLyRaxccgDdhT0PNR1Y6OLA+T1T3r6IYKhaT2E0TvCUa+xKwTM6qKqj6hTLNR5/tsSuco1SCqxd5VrLo6ZCl1jYU2BeTxc9hezRXRC96Xm4rCctd3d10ds9erm7K9XpFj1do9+jpyC6C117f7+RevnP3b2+Z8xywaMvmwEcHLNYtVbnvsefGznctGX70/zmsWepp7/olx2ygJOWL2L9aStYdfQiXn7kQSzsnd7/5JKY35PNn7RDtVYfd7RULNdHBdLY0VK5VqdSrVOtB+VanWqtTqUWVGp1KrU6pUqd50pVyqls7Pr8cr2Nd2OWoGdsEHWJnu4UPl2itzt77il0jVoeHVhd9KZQ6xmznA+qsSE6OgDHf9+eQgrQ7t3LPtw4tzg4ZomIYOCpIrelUcRt23dyxyM7KVZqACxa2MNJyxbx1pcfwarlB3PiskUcdsC8aW71/tdd6GrbaKlZtfruEKkOB0o9qFRHh0y1XqdcHb1crac61aBSHx1mlbS+nH/f/PvVRodeuVbn+aHqHgFXrdUp19JnVXfXbadCl7JQK2SBsjvMxh+F7RlSe9ZtFHq7R4lZsPbmlrNgGw7ebLm7a89g9GHM8Tk4ZqiduypsGXiaLQ8/PRIWTz5fBqC3u4tXHHkQ69YsZ9XyRZy0bBEveuFC/08+QxS6RKGrfaOqdogIavXIhVQKrPRcqdVHLY8EVjWFXgqkSm30cj7UsnX50Bsn/OpZQD5bqY4sV3JhmgVsvh1tHN7BSMCNf0hx7yO5PUdfY0KvKxsNDi9nYZbq5JZHPncvI8d8iA63qbur/YHn4JiBfnX/E5z7jc0MVetI8NLFB/CG45awavkiVi1fxMsOP9CnwNqUkpTtdArMusDLh0h+pFfea2DtGVpjg250mGUButcRZG75+XJtJEz3diizWsva3E75YLv0A6t57crDpvT9HRwzzP2Dz3HBt27m6EMX8pl3vpwTlh08rYddzGYySSN/fc8m9Xo2kppw9FUbf76tUts98sovV8cJs2o9OOLgqT9k7eCYQX7/fJk/+eZmeru72HDOKSw/dOF0N8nM2qCrS8zrKjBvlu6BZ2mz556hao3zr+zndztLXHX+qQ4NM5uxHBwzQETwyWtup/+hp/ifHziZ1UcfMt1NMjPbq9l1YHCO+sr19/HDLY/yX952LG8/8cjpbo6Z2YQcHNPsh7c+wpd/fh/vXb2MD7/+JdPdHDOzhhwc06j/wd/z59fczquOOZTPvucE/w7DzGYFB8c0eejJ5zn/Wzdz1CEL+F8ffKV/l2Fms4b3VtNg564K535zM/UINpxzCosW9k53k8zMmtbW4JB0uqR7JW2TdNE46yXpkrT+dkmrc+s+IelOSVslXSVpfio/VNJ1ku5Lz7PqFKRytc4F376Z7b/fxRUf7OOYw14w3U0yM2tJ24JDUgG4FDgDOB44S9LxY6qdAaxMj/OBy9K2RwEfA/oi4hVAAViXtrkIuD4iVgLXp9ezQkTwFz+8g18/8CSff++JrDnm0OlukplZy9o54lgDbIuIByKiDFwNrB1TZy1wZWRuBBZJWprWdQMLJHUDC4FHc9tsTMsbgXe1sQ9T6vJ/eYDv9Q/wsTe+lPesXjbdzTEzm5R2BsdRwPbc64FU1rBORDwCfBF4GNgB7IyIn6U6h0fEDoD0vKQNbZ9y196xg8//5B7ecdKRfOItL5vu5piZTVo7g2O8c0vHXhJy3Dpp3mItcAxwJPACSX/c0odL50vql9Q/ODjYyqZTbsv2p/nEd7ew+uhF/I/3nejTbs1sVmtncAwAy3Ovl7H7cFOjOm8GfhsRgxFRAb4PvDrVeWz4cFZ6fny8D4+IKyKiLyL6Fi9evM+dmayBp3bxHzf2s+SgeXz17L5ZdclqM7PxtDM4NgMrJR0jqZdscnvTmDqbgLPT2VWnkh2S2kF2iOpUSQuV/Xn+JuDu3Dbr0/J64Edt7MM+eaZU4bxv9jNUrfGNc07hhR14Rz4zm3vadpHDiKhKuhD4KdlZURsi4k5JF6T1lwPXAmcC24BdwLlp3U2SrgFuAarArcAV6a0/B3xP0nlkAfP+dvVhX1RrdS78h1u5f/A5Nv7JGl665MDpbpKZ2ZRQRHvvRDUT9PX1RX9//377vIjgL3+0lW/f+DCffc8JnLXm6P322WZmU0XSzRHRN7bcvxxvgw2/fJBv3/gwH3rdix0aZjbnODim2M/veoz/9uO7OP3lR/DJ04+b7uaYmU05B8cU2vrITj529a2ccNTBfOmPVtHV5dNuzWzucXBMkd/tLHHexs0cvKCHr53dx4Jen3ZrZnOTg2MKPD9U5byNm3muVGXDOaew5KD5090kM7O28T3H91GtHvzp1Vu4e8czfH39Kfy7pQdNd5PMzNrKI4599DfX3s3P736Mv37Hy3nDcbPisllmZvvEwbEPvnXjQ3z9X3/LOa9ewfpXr5ju5piZ7RcOjkn6l98McvGmO3njcUv4y7ePvc2Imdnc5eCYhG2PP8eF37mFlUsO4JKzTqbg027NrIM4OCbhf9/2KM8OZWdQHTDP5xeYWWdxcExCsVJjQU+BIxctmO6mmJntdw6OSSiWa8zv8VdnZp3Je79JKKURh5lZJ3JwTEKxUvOd/MysYzk4JqFUqTs4zKxjOTgmoVSp+SKGZtaxHByTkB2q8ldnZp3Je79J8OS4mXUyB8ckFCs15jk4zKxDOTgmYahS94jDzDqWg2MSPMdhZp3Me79JKJY9x2FmncvB0aKIoFR1cJhZ53JwtGioWicCT46bWcdycLRoqFIH8IjDzDqWg6NFxUoNwJccMbOO5eBoUSkFx4Jef3Vm1pm892vR8IjDh6rMrFM5OFo0HByeHDezTtXW4JB0uqR7JW2TdNE46yXpkrT+dkmrU/mxkrbkHs9I+nhad7GkR3LrzmxnH8YqecRhZh2uu11vLKkAXAq8BRgANkvaFBF35aqdAaxMj1cBlwGvioh7gVW593kE+EFuuy9FxBfb1faJlDw5bmYdrp0jjjXAtoh4ICLKwNXA2jF11gJXRuZGYJGkpWPqvAm4PyIeamNbm1by6bhm1uHaGRxHAdtzrwdSWat11gFXjSm7MB3a2iDpkPE+XNL5kvol9Q8ODrbe+r0olodHHJ4eMrPO1M69n8Ypi1bqSOoF3gn8Y279ZcBLyA5l7QD+drwPj4grIqIvIvoWL17cSrsn5LOqzKzTtTM4BoDludfLgEdbrHMGcEtEPDZcEBGPRUQtIurAV8kOie03I3McvnWsmXWodgbHZmClpGPSyGEdsGlMnU3A2ensqlOBnRGxI7f+LMYcphozB/JuYOvUN33vRoKj28FhZp2pbWdVRURV0oXAT4ECsCEi7pR0QVp/OXAtcCawDdgFnDu8vaSFZGdkfWjMW39B0iqyQ1oPjrO+rUqVOoUu0VMY7yibmdnc17bgAIiIa8nCIV92eW45gI/sZdtdwAvHKf/gFDezJcVKjfndXUgODjPrTD41qEWlSo0Fnt8wsw7m4GhRsVJjnuc3zKyDOTha5BGHmXU6B0eLSpW6f8NhZh3NwdGiYrnmX42bWUfzHrBFpWrNFzg0s47m4GhRNuJwcJhZ53JwtKhUqXmOw8w6moOjRZ4cN7NO5+BoUbHiyXEz62zeA7aoVKn5yrhm1tEcHC2o14Ohat1XxjWzjubgaMFQNd021iMOM+tgDo4WFEfuxeGvzcw6l/eALRi5baxHHGbWwRwcLRi5+59PxzWzDubgaEGx7OAwM3NwtGComg5VOTjMrIM5OFpQLGdnVXnEYWadzMHRguE5Do84zKyTOThaMHI6ri85YmYdzHvAFhR9VpWZmYOjFUP+HYeZmYOjFR5xmJk5OFpSqqSzqnzJETPrYN4DtqBYqdFTEN0Ff21m1rm8B2yB7zduZtZkcEg6rN0NmQ2Gqr7fuJnZhMEh6R2SBoE7JA1IevV+ateM5BGHmVnjEcd/B/4gIpYC7wU+28qbSzpd0r2Stkm6aJz1knRJWn+7pNWp/FhJW3KPZyR9PK07VNJ1ku5Lz4e00qZ9UarUPeIws47XKDiqEXEPQETcBBzY7BtLKgCXAmcAxwNnSTp+TLUzgJXpcT5wWfqseyNiVUSsAl4J7AJ+kLa5CLg+IlYC16fX+0WxUvOvxs2s43U3WL9E0p/t7XVE/N0E264BtkXEAwCSrgbWAnfl6qwFroyIAG6UtEjS0ojYkavzJuD+iHgot83r0/JG4Abgkw36MSVKFR+qMjNr9OfzV8lGGcOP/OsDGmx7FLA993oglbVaZx1wVe714cPBkp6XjPfhks6X1C+pf3BwsEFTm+PgMDNrMOKIiM/sbZ2kUxq8t8Z7y1bqSOoF3gl8qsFn7fkmEVcAVwD09fWN/dxJKVZqLHVwmFmHa3SoapQ0R7EOOAvYCfRNUH0AWJ57vQx4tMU6ZwC3RMRjubLHhg9nSVoKPN5KH/ZFqVL3darMrOM1nOmV9CJJF0m6DfgW8GHgLRExUWgAbAZWSjomjRzWAZvG1NkEnJ3OrjoV2DlmfuMsRh+mGt5mfVpeD/yoUR+miifHzcwajDgk/Qo4GLgaeF9E3CfptxHxYKM3joiqpAuBnwIFYENE3CnpgrT+cuBa4ExgG9mZU+fmPnsh8BbgQ2Pe+nPA9ySdBzwMvL+Zjk4Fz3GYmTU+VDVIdvjocGAxcB97zlPsVURcSxYO+bLLc8sBfGQv2+4CXjhO+ZNkZ1rtdw4OM7MGh6oiYi1wAnAL8BlJvwUOkbRmfzRuJqnW6lRq4R8AmlnHazg5HhE7gQ3ABkmHA38EfFnS8ohYPvHWc0epmi6p7jkOM+twLe0FI+KxiLgkIl4NvLZNbZqRiuV09z+POMyswzWaHB97FtRY75zCtsxoJd/9z8wMaHyo6jSyX3ZfBdzE+D/Y6wgODjOzTKPgOILslNizgA8APwauiog7292wmWb4trE+VGVmna7RWVW1iPhJRKwHTiX7vcUNkj66X1o3gxQ94jAzA5o4q0rSPOAPyUYdK4BLgO+3t1kzz3BwLOj1WVVm1tkaTY5vBF4B/B/gMxGxdb+0agYanuOY1+0Rh5l1tkYjjg8CzwMvAz4mjcyNi+yH3we1sW0zSmlkxOHgMLPO1uiy6j4uk4wEh+c4zKzDORiaNPwDQE+Om1mnc3A0qejTcc3MAAdH03ZPjvsrM7PO5r1gk0qVGvO6u+jq6tgfz5uZAQ6OppUqNZ9RZWaGg6NpxUqN+f4Nh5mZg6NZpUrdIw4zMxwcTSumOQ4zs07nPWGTPMdhZpZxcDSp5DkOMzPAwdG0okccZmaAg6NppUrdvxo3M8PB0bRiuca8Hn9dZmbeEzapVKl5xGFmhoOjaaVKzVfGNTPDwdGUiMgmxx0cZmYOjmZUakE9YL7nOMzMHBzNKFZ8Eyczs2FtDQ5Jp0u6V9I2SReNs16SLknrb5e0OrdukaRrJN0j6W5Jp6XyiyU9ImlLepzZzj4ADPl+42ZmIya85/i+kFQALgXeAgwAmyVtioi7ctXOAFamx6uAy9IzwFeAn0TE+yT1Agtz230pIr7YrraPNTLi8C/HzczaOuJYA2yLiAciogxcDawdU2ctcGVkbgQWSVoq6SDgdcDXASKiHBFPt7GtEyp6xGFmNqKdwXEUsD33eiCVNVPnxcAg8A1Jt0r6mqQX5OpdmA5tbZB0yHgfLul8Sf2S+gcHB/epI6V0v3FPjpuZtTc4xrvHajRZpxtYDVwWEScDzwPDcySXAS8BVgE7gL8d78Mj4oqI6IuIvsWLF0+i+bsVy54cNzMb1s7gGACW514vAx5tss4AMBARN6Xya8iChIh4LCJqEVEHvkp2SKytStV0qMrBYWbW1uDYDKyUdEya3F4HbBpTZxNwdjq76lRgZ0TsiIjfAdslHZvqvQm4C0DS0tz27wa2trEPAJQ84jAzG9G2s6oioirpQuCnQAHYEBF3Srogrb8cuBY4E9gG7ALOzb3FR4HvpNB5ILfuC5JWkR3SehD4ULv6MMwjDjOz3doWHAARcS1ZOOTLLs8tB/CRvWy7Begbp/yDU9zMhorl4clxB4eZmU8TasLI6bgODjMzB0czSik4fD8OMzMHR1NKlRoSzOv212Vm5j1hE4Zv4iSN97MTM7PO4uBoQtE3cTIzG+HgaEKxXPfEuJlZ4uBoQqla88S4mVnivWETSmXfNtbMbJiDowmlquc4zMyGOTiaUPSIw8xshIOjCaVK3SMOM7PEwdGEUqXmmziZmSXeGzahWPGhKjOzYQ6OJpT8A0AzsxEOjiYUKzUW9Do4zMzAwdFQRHhy3Mwsx8HRwFB1+CZO/qrMzMDB0VCx7Js4mZnlOTgaGL7fuA9VmZllHBwNeMRhZjaag6OBUsVzHGZmed4bNlCs+FCVmVmeg6OBoYoPVZmZ5Tk4GvCIw8xsNAdHA8PB4V+Om5llHBwNjEyOdzs4zMzAwdHQyKGqXn9VZmbg4GhoyHMcZmajODga8A8AzcxGa2twSDpd0r2Stkm6aJz1knRJWn+7pNW5dYskXSPpHkl3SzotlR8q6TpJ96XnQ9rZh2KlRneX6Ck4Y83MoI3BIakAXAqcARwPnCXp+DHVzgBWpsf5wGW5dV8BfhIRxwEnAXen8ouA6yNiJXB9et02vqS6mdlo7fwzeg2wLSIeiIgycDWwdkydtcCVkbkRWCRpqaSDgNcBXweIiHJEPJ3bZmNa3gi8q419oOi7/5mZjdLO4DgK2J57PZDKmqnzYmAQ+IakWyV9TdILUp3DI2IHQHpeMt6HSzpfUr+k/sHBwUl3YqhS83WqzMxy2rlH1Dhl0WSdbmA1cFlEnAw8T4uHpCLiiojoi4i+xYsXt7LpKMVKzRPjZmY57QyOAWB57vUy4NEm6wwAAxFxUyq/hixIAB6TtBQgPT8+xe0epeT7jZuZjdLO4NgMrJR0jKReYB2waUydTcDZ6eyqU4GdEbEjIn4HbJd0bKr3JuCu3Dbr0/J64Edt7EM2x+FfjZuZjehu1xtHRFXShcBPgQKwISLulHRBWn85cC1wJrAN2AWcm3uLjwLfSaHzQG7d54DvSToPeBh4f7v6AFCs1Dl4QU87P8LMbFZpW3AARMS1ZOGQL7s8txzAR/ay7Ragb5zyJ8lGIPvFUKXG/APn7a+PMzOb8Xy6UANFz3GYmY3i4Gig5DkOM7NRHBwNFMsecZiZ5Tk4GvAlR8zMRnNwTKBWD8q1un85bmaW4z3iBEoVX1LdzGwsB8cESr6Jk5nZHhwcEyh6xGFmtgcHxwRKlToA8zzHYWY2wnvECXiOw8xsTw6OCYwcqvLvOMzMRjg4JuDJcTOzPTk4JlAs+1CVmdlYDo4JlKrZ5Lh/AGhmtpv3iBMolX2oysxsLAfHBEpVH6oyMxvLwTGBokccZmZ7cHBMoOizqszM9uDgmECpUqe30EWhS9PdFDOzGcPBMYFSpeYzqszMxvBecQLHHXEgp7/iiOluhpnZjNI93Q2YydatOZp1a46e7maYmc0oHnGYmVlLHBxmZtYSB4eZmbXEwWFmZi1xcJiZWUscHGZm1hIHh5mZtcTBYWZmLVFETHcb2k7SIPDQJDc/DHhiCpszG7jPncF97gz70ucXRcTisYUdERz7QlJ/RPRNdzv2J/e5M7jPnaEdffahKjMza4mDw8zMWuLgaOyK6W7ANHCfO4P73BmmvM+e4zAzs5Z4xGFmZi1xcJiZWUscHBOQdLqkeyVtk3TRdLdnqkjaIOlxSVtzZYdKuk7Sfen5kNy6T6Xv4F5Jb5ueVk+epOWSfiHpbkl3SvrTVD6X+zxf0r9Jui31+TOpfM72eZikgqRbJf1zej2n+yzpQUl3SNoiqT+VtbfPEeHHOA+gANwPvBjoBW4Djp/udk1R314HrAa25sq+AFyUli8CPp+Wj099nwcck76TwnT3ocX+LgVWp+UDgd+kfs3lPgs4IC33ADcBp87lPuf6/mfAPwD/nF7P6T4DDwKHjSlra5894ti7NcC2iHggIsrA1cDaaW7TlIiI/wf8fkzxWmBjWt4IvCtXfnVEDEXEb4FtZN/NrBEROyLilrT8LHA3cBRzu88REc+llz3pEczhPgNIWgb8IfC1XPGc7vNetLXPDo69OwrYnns9kMrmqsMjYgdkO1pgSSqfU9+DpBXAyWR/gc/pPqdDNluAx4HrImLO9xn4MvDnQD1XNtf7HMDPJN0s6fxU1tY+d+9DY+c6jVPWiecuz5nvQdIBwD8BH4+IZ6TxupZVHads1vU5ImrAKkmLgB9IesUE1Wd9nyW9HXg8Im6W9PpmNhmnbFb1OXlNRDwqaQlwnaR7Jqg7JX32iGPvBoDludfLgEenqS37w2OSlgKk58dT+Zz4HiT1kIXGdyLi+6l4Tvd5WEQ8DdwAnM7c7vNrgHdKepDs0PIbJX2bud1nIuLR9Pw48AOyQ09t7bODY+82AyslHSOpF1gHbJrmNrXTJmB9Wl4P/ChXvk7SPEnHACuBf5uG9k2asqHF14G7I+Lvcqvmcp8Xp5EGkhYAbwbuYQ73OSI+FRHLImIF2b/X/xsRf8wc7rOkF0g6cHgZeCuwlXb3ebrPCJjJD+BMsjNw7gc+Pd3tmcJ+XQXsACpkf4GcB7wQuB64Lz0fmqv/6fQd3AucMd3tn0R/X0s2HL8d2JIeZ87xPp8I3Jr6vBX4q1Q+Z/s8pv+vZ/dZVXO2z2Rnfd6WHncO76fa3WdfcsTMzFriQ1VmZtYSB4eZmbXEwWFmZi1xcJiZWUscHGZm1hIHh1kDkj6drjB7e7oC6avSFUkPa2LbPkmXNKizIn+lYrOZzpccMZuApNOAt5NdXXcohUVvk9t2R0Q/0N/ONprtbx5xmE1sKfBERAwBRMQTkS7xAHxU0i3pXgjHAUi6WNIVkn4GXCnp9bn7Qlys7F4oN0h6QNLHxn6YpBene0mcImmNpF+l17+SdGyqs1DS99II6LuSbpLUl9a9VdKvU7v+MV2fy2xKOTjMJvYzYLmk30j6e0n/PrfuiYhYDVwG/Odc+SuBtRHxgXHe7zjgbWTXE/rrdA0tAFIw/BNwbkRsJrtEyOsi4mTgr4C/SVU/DDwVEScC/zV9Hmk09BfAm1O7+snuTWE2pXyoymwCEfGcpFcCfwC8Afiudt8NcvhiiTcD78lttikiint5yx+n0cuQpMeBw1P5YrLrCb03Iu5MZQcDGyWtJLtkynDIvBb4SmrfVkm3p/JTyW7U88t05d9e4NeT6LbZhBwcZg1EdnnyG4AbJN3B7ovHDaXnGqP/LT0/wdsN5Zbz2+0ku0/Ca8iuOQTZaOIXEfHudB+RG1L53q4HL7L7bpw1weeb7TMfqjKbgKRj01/8w1YBD7Xho8pkd2k7W9LwIa6DgUfS8jm5uv8K/IfUvuOBE1L5jcBrJL00rVso6WVtaKt1OAeH2cQOIDtcdFc6JHQ8cHE7Piginic7g+sTktaS3Tf6s5J+CRRyVf8eWJza80myK+DujIhBsoC5Kq27kWxOxWxK+eq4ZrOMpALQExElSS8hu2z2yyKiPM1Nsw7hOQ6z2Wch8It0RpaA/+TQsP3JIw4zM2uJ5zjMzKwlDg4zM2uJg8PMzFri4DAzs5Y4OMzMrCX/H06O4v03NmGCAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "pyplot.plot(x_tick, MAP_per_shrinkage)\n",
    "pyplot.ylabel('MAP')\n",
    "pyplot.xlabel('Shrinkage')\n",
    "pyplot.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### The shrinkage value (i.e. support) have a much stronger impact. Combine a parameter search with the two to ensure maximum recommendation quality\n",
    "\n",
    "## Be careful, overfitting!\n",
    "\n",
    "#### While a thorough parameter tuning might result in significantly higher MAP on your validation split, it could have only marginally better or even worse MAP on the test set"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Feature weighting"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# IDF"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "![title](https://www.link-assistant.com/images/news/tf-idf-tool-for-seo/screen-03.png)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(10681, 16529)\n",
      "(10681,)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\ferra\\Anaconda3\\envs\\RecSysFramework\\lib\\site-packages\\ipykernel_launcher.py:6: RuntimeWarning: divide by zero encountered in true_divide\n",
      "  \n"
     ]
    }
   ],
   "source": [
    "num_tot_items = ICM_all.shape[0]\n",
    "\n",
    "# let's count how many items have a certain feature\n",
    "items_per_feature = np.ediff1d(ICM_all.indptr)\n",
    "\n",
    "IDF = np.array(np.log(num_tot_items / items_per_feature))\n",
    "\n",
    "print(ICM_all.shape)\n",
    "print(IDF.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([8.17760945, 6.33178276, 5.94401723, ..., 9.27622174, 8.58307456,\n",
       "       8.58307456])"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "IDF"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAEGCAYAAABvtY4XAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAa2ElEQVR4nO3de3hV9Z3v8feXBHIBAiIBws2gpSigIkbrFcdLbfHS2tan6oy12jOjnnqO9fhMWz2dYzudS5/W80ydtqe29qLWqtMztrY91nrpxVqtNxBFLiIVkXIJBBESQhKy9/6eP9YKbEJCINlrr7XX/ryeZz9Ze2Xtvb4hyYdffuu3fj9zd0REJH2GxV2AiIhEQwEvIpJSCngRkZRSwIuIpJQCXkQkpSrjLiDf+PHjvbGxMe4yRERKxuLFi7e6e31fn0tUwDc2NrJo0aK4yxARKRlm9nZ/n1MXjYhISingRURSSgEvIpJSCngRkZRSwIuIpJQCXkQkpRTwIiIplahx8CJS3tq7Mtz73Fo6d2fjLqWoaqsquf6sowr+vgp4EUmM59e8w9ceWwWAWczFFNH4UVUKeBFJt0wuWIDoVzeewZzJY2KupvSpD15EEqNngTmjjJrvEVLAi0hi9CwhWk7dM1FSwItIYvSsEK2ALwwFvIgkhrpoCksBLyKJ4aiLppAU8CKSGHtb8FIIkQa8mX3GzJaZ2XIzuynKc4lI6VMffGFFFvBmNhf4O+Bk4HjgIjObGdX5RKT07R1Fo4QvhChb8McAz7v7LnfPAH8APhLh+UQkJRTvhRFlwC8DFpjZ4WZWC1wATOt9kJlda2aLzGxRS0tLhOWISNLl1IIvqMgC3t1XAl8FngQeA14FMn0cd5e7N7l7U319nwuDi0iZ0EXWwor0Iqu7/8Dd57v7AmAbsDrK84lIadsT8Er4goh0sjEzm+DuW8xsOvBR4NQozycipW3PKBq14Qsi6tkkf2pmhwPdwA3u/m7E5xOREqa5aAor0oB39zOjfH8RSRcf+BA5BLqTVUSSI0z4YcPUhC8EBbyIJMaeuWhiriMtFPAikhg5jaIpKAW8iCSGpgsuLAW8iCSGpgsuLAW8iCSG7mQtLAW8iCTGnmGSSviCUMCLSHL03OikhC8IBbyIJEZPC17D4Asj6qkKRCQhcjlP/J2i2ZymCy4kBbxIGXi3fTdn3f57Wjv3m7E7kSrUhC8IBbxIGdi6s4vWzgwXHtvArEmj4y7ngBrGVDOmZnjcZaSCAl6kDPTcIXrBsQ1ceFxDvMVI0egiq0gZ6FkKTz0f5UUBL1IGtNZpeVLAi5SBnjtE1YIvLwp4kTKwN+CV8OVEAS9SBvb0wes3vqzo2y1SBtQHX54U8CJlIKcumrKkgBcpA65hkmVJAS9SBtSCL08KeJEysLcPPuZCpKgU8CJlYO+drEr4cqKAFykDGgdfnhTwImVAd7KWJ80mKRKD1Zvb+MQPXqSjO1uU82WyOQCGKeHLigJeJAZvtuykubWTDx0/mXEjRxTlnKOqKpkzua4o55JkUMCLxKBn2OINZ78n8QtwSOlSH7xIDHrWHlWPiURJAS8Sg72TfynhJTqRBryZ/Q8zW25my8zsQTOrjvJ8IqWipwVfoWGLEqHIAt7MpgA3Ak3uPheoAC6P6nwipaSnD75CLXiJUNRdNJVAjZlVArXAxojPJ1IScjlNHSDRiyzg3X0D8L+BdcAmYIe7P9H7ODO71swWmdmilpaWqMoRSZRs2AevFrxEKcoumsOADwMzgMnASDO7svdx7n6Xuze5e1N9fX1U5YgkSs9FVvXBS5Si7KI5D3jL3VvcvRv4GXBahOcTKRl7u2gU8BKdKG90WgecYma1QAdwLrAowvOJROKBF9axYfuugr7n0vU7AHXRSLQiC3h3f8HMHgJeBjLAEuCuqM4nEoW2zm7+58OvYVb47pTp42oZVaWbySU6kf50ufsXgS9GeQ6RKGWyQVfKFy+azdWnz4i5GpFDoztZRQ4gk9NoFyldCniRA9gz2mWYflWk9OinVuQA9rbgYy5EZBD0YytyALmcWvBSuvRTK3IAWbXgpYTpx1bkADJqwUsJ0yBcSa0/rm7hwRfXDek92jozgKYUkNKkgJfUemjxep5csZnGw0cO6X2OnTKG2VrLVEqQAl5SK5N1po+r5cmbz4q7FJFYqGNRUiuTyzFcV0eljOmnX1Irm3PdgSplTQEvqZXJOZUKeCljCnhJLbXgpdzpIqsk2s6uDA8v2cDuTO6QX/uXbbuYUFcdQVUipUEBL4n2xPJm/tfPlw369SceMa6A1YiUFgW8JFpHdxaA39y8gPrRh94aH60FNaSM6adfEq1nwY3DakcwpmZ4zNWIlBZdZJVE684Gfe+VGs8ucsj0WyOJ1jPZ1/AKjYYROVTqopGi6s7m+OPqFrq6D25UzIqNrQBUajZHkUOmgJei+u3KLVz/48WH9Jq66kq14EUGQQEvRdXW2Q3APdecxKQxBzcqZvyoKkzT9YocMgW8FFV3OCrm6El1Bx3wIjI46tiUouoZFaMuF5HoqQUvBdOVyQ54TM+NS8Mr1bYQiZoCXgriK4+u5LtPrzno40doXLtI5BTwUhCrt+xkYl0VV53aOOCx08bVUj28IvqiRMqcAl4KYncmx5SxNdxw9nviLkVEQvo7WQpidzbHCPWriySKWvByUBat3cb6dzv6/fyW1k6mjastYkUiMpDIAt7MZgE/ydt1JHCbu98R1TklGplsjiu+9/yeMez90dzrIskSWcC7+ypgHoCZVQAbgIejOp9EpyuTozvrXHfWkVx+0vR+j5t6WE0RqxKRgRSri+Zc4E13f7tI55MC6lkub1JdNTPGj4y5GhE5WMUK+MuBB4t0LhmEd3Z20bKzq5/P7QagqlJDG0VKSeQBb2YjgA8Bt/bz+WuBawGmT+//z3+J1vu//jTb2ncf8Ji6Gl2TFyklxfiNXQi87O6b+/qku98F3AXQ1NR04Kt4EolMNse29t1cfPxkLpg7qc9jRlQO48yZ9UWuTESGohgBfwXqnkm0rrCPfe7kOhYe2xBzNSJSKAcMeDO7x92vDrc/6e73Hsqbm1kt8H7gukFXKAXh7jz6WjOt4Xzs+XbtDiYAq9KNSiKpMlAL/vi87c8AhxTw7r4LOPxQi5LCe2PzTm544OUDHjN5rIY5iqTJQAGvPvGU6FlJ6d8vn8f7Zuz/f25lhTF+VFWxyxKRCA0U8FPN7BuA5W3v4e43RlaZFFRnuMh1w5garaQkUiYGCvjP5m0virIQKYwN2zv46Lefpb1r38U3Mrkg4Gs0Ta9I2ThgwB/qRVWJ31st7Wxu7eKi4xqYWLdvS72uejjHNIyOqTIRKbYBh0ma2ScJLrDOCnetBL7h7j+KsjAZnM5wSbzrFhzFsVPHxFyNiMRpoGGSVwE3ATcDLxP0xc8HbjczFPLxu+0Xy1i+sXXP83fDu1Grh2vIo0i5G6gF/2ngI+6+Nm/f78zsY8B/AAr4GLk797+wjsljqzliXDAJWM3YGuZMGcP0wzU3u0i5Gyjg63qFOwDuvtbM6qIpSQ7W7myObM65/KTpWipPRPYzUMD3v4TPgT8nEfjXR1fy1Kote55nc8FtClrAWkT6MlDAH2NmS/vYbwQrNEkR/WrpJgCOy7t4OnvyGM45ekJcJYlIgg0Y8EWpQvbTlcmyo2PfeWPad2e4+LjJ/NMlc2OqSkRKyUDj4LUCU0wuvfM5XtuwY7/9mpNdRA7WQMMk2+h7PhoD3N11oTUia99p57SjDueCvOl7h5nx/tkTY6xKRErJQC143fZYBE+u2Mzit9/dZ197V4YTpo/lylOOiKkqESl1+ns/Af7x/y1n4/YOKiv23pxUVVnBnMm6E1VEBk8BH5PO7iwtbcEi160d3Vx5yhF8+cO6eCoihaOAj8k1d7/Ec2ve2fN8bO2IGKsRkTRSwMdk3bZdnNR4GJedNJ1hhsayi0jBKeCL6K2t7Vx/32I6M1k27ejg3GMmcOmJU+MuS0RSSlMOFtFrG3awanMbMyeM5sPzpvCx+Qp3EYmOWvAR29zayW2/WEZnd47mHZ0A/PMlc7VsnohETi34iL3w1jYeX76Zza2dVI+oYOHcSdSP1uLWIhI9teAjsHF7B//66Ep2Z3Js2B5MunnPNSer1S4iRaUWfASeWb2VR5ZuYs3WdrI555yjJzB+lIZBikhxqQVfYP/0yAp+u3IzAD/79GnUVQ+PuSIRKVdqwRdQLufc/exbdGedjzdNZXSV/v8UkfgogQpg044O/vp7L9DWmSHncM3pjfztmVoPRUTipYAfoh27uvnj6q28tbWdD8yZyMS6ahbmTfErIhIXBfwQdGWynPHV39HWlQHgHy6czbRxtTFXJSISUMAP0hPLm1m+sZW2rgwfb5rKwmMbFO4ikigK+EHYnclx/Y8Xk3OoGGZceuI0Tp4xLu6yRET2EWnAm9lY4PvAXIKl/z7l7s9Fec4odXZnufruF9nS1kXO4R8uPIYrTzmC6uEVcZcmIrKfqIdJ/jvwmLsfDRwPrIz4fJHZtKODny/ZwPNrtjG2ZjiXzJvMB+dOUriLSGJF1oI3szpgAXA1gLvvBnZHdb6offr+l1mybjsAt108h3nTxsZckYjIgUXZgj8SaAHuNrMlZvZ9MxvZ+yAzu9bMFpnZopaWlgjLGZzubI7r71vM8o2tnHfMBH79mTM5fqrWShWR5Isy4CuB+cCd7n4C0A7c0vsgd7/L3Zvcvam+vj7Ccg7dn7e08eCL63hseTNH1Y/imtNncExDHWYWd2kiIgOK8iLremC9u78QPn+IPgI+ya67bzFvtrQD8OUPz+GkRo2UEZHSEVnAu3uzmf3FzGa5+yrgXGBFVOcrpO5sjou/+QxvtrTzsflTuem8mRrjLiIlJ+px8P8duN/MRgBrgGsiPt+QPbG8mV8va+b15jbOnlXPp88+SuEuIiUp0oB391eApijPUUgtbV1ce99iRlQOY9q4Gj6/8GiOqh8Vd1kiIoOiO1nzfPahVwG48n1HcNvFs2OuRkRkaDQffOiO37zBU6tamD6uls8vnBV3OSIiQ6aAJ1hi747frAbge1c1UVWpu1NFpPSVfcBv3N7BlT8IRnL+28ePZ9ak0TFXJCJSGGUd8Jlsjk/d8xIAFx7bwEfnT425IhGRwinbi6xdmSz/8quVvN7cRuUw45tXnBB3SSIiBVW2LfjvPLWGHz33NgBPf+5shg3T9AMiki5lGfBdmSxf/80bADx7yzlMHlsTc0UiIoVXlgH/+YeWAnDdgiOZonAXkZQqu4B/ed27/PyVjQDcfP57Y65GRCQ6ZRXwO3Z189Fv/wmAO/9mvsa7i0iqlVXAX/ytZwA475gJfHDupJirERGJVtkE/Atr3mHdtl1MGVvDt/56vhbtEJHUK5uAv/n/BhOJffcTJ2qhbBEpC2UR8EvWvcuG7R00jKlm7hStpyoi5aEsAv7r4URid155YsyViIgUT+oDviuT5ek3Wpgwuop508bGXY6ISNGkPuDvC6cj0ERiIlJuUh/wX/n16wDceO57Yq5ERKS4Uh3wT7/RQjbnzJs2ltoRZTtxpoiUqdQGfCab46ofvgjA1y49LuZqRESKL7UB/9vXtwDBXavvnahVmkSk/KQ24B98cR0A/3zJsTFXIiISj1QGfDbnPLWqhYphxqQx1XGXIyISi1QG/N3PvgXA3515ZMyViIjEJ5UB//0/BgF//VkKeBEpX6kL+PauDM2tnfzVrHrG1o6IuxwRkdikLuBfWrsNgLNnTYi5EhGReKUu4HumJvjAHC3oISLlLXUB//a2XQAaPSMiZS91Af/nLTs5c+b4uMsQEYldpBO0mNlaoA3IAhl3b4ryfM+s3grAyY3jojyNiEhJKMYMXGe7+9YinIcnVzQDcMkJU4pxOhGRREtVF83za7ZRO6KCaeNq4y5FRCR2UQe8A0+Y2WIzu7avA8zsWjNbZGaLWlpahnSy9t0ZZmpiMRERIPqAP93d5wMLgRvMbEHvA9z9Lndvcvem+vr6QZ+oszvL+nc7OO2ow4dQrohIekQa8O6+Mfy4BXgYODmqc73e3AbAhNFVUZ1CRKSkRBbwZjbSzEb3bAPnA8uiOl/zjg4ATp6hETQiIhDtKJqJwMNm1nOeB9z9sahO1ryjE4BJdbrBSUQEIgx4d18DHB/V+/e2qbWTERXDGDdSE4yJiECKhklu3tHJxDFVhH8xiIiUvdQE/KYdnTTU1cRdhohIYqQm4JtbO5moCcZERPZIRcC7O807OmlQwIuI7JGKgN++q5uuTI6JGkEjIrJHKgJ+xaZWAA7XCBoRkT1SEfA7OroBaBw/MuZKRESSIxUB39YZBPz4UWrBi4j0SEXAv7F5JwB1NcNjrkREJDlSEfA5dwDqqhXwIiI9UhHwbZ0ZJmuIpIjIPlIR8K0d3YxW611EZB+pCPi2zgx1NcVYXlZEpHSkIuB3dmUYVaWAFxHJl4qA787mGF6Rii9FRKRgUpGKCngRkf2lIhUzOaeyQvPAi4jkS0fAZ53KYan4UkRECiYVqRh00agFLyKSLxUBn8k5FcMU8CIi+dIR8LrIKiKyn1SkYibnVKoFLyKyj1QE/PmzJzJnSl3cZYiIJEoqbv+84/IT4i5BRCRxUtGCFxGR/SngRURSSgEvIpJSCngRkZRSwIuIpJQCXkQkpRTwIiIppYAXEUkpc/e4a9jDzFqAtwf58vHA1gKWU2iqb2hU39CovqFJcn1HuHt9X59IVMAPhZktcvemuOvoj+obGtU3NKpvaJJeX3/URSMiklIKeBGRlEpTwN8VdwEDUH1Do/qGRvUNTdLr61Nq+uBFRGRfaWrBi4hIHgW8iEhKlXzAm9kHzWyVmf3ZzG4p4nl/aGZbzGxZ3r5xZvakma0OPx6W97lbwxpXmdkH8vafaGavhZ/7hpkVZO1BM5tmZr83s5VmttzMPpOkGs2s2sxeNLNXw/r+MUn15b13hZktMbNHklafma0N3/cVM1uUwPrGmtlDZvZ6+HN4alLqM7NZ4b9bz6PVzG5KSn0F4+4l+wAqgDeBI4ERwKvA7CKdewEwH1iWt+9rwC3h9i3AV8Pt2WFtVcCMsOaK8HMvAqcCBvwaWFig+hqA+eH2aOCNsI5E1Bi+16hwezjwAnBKUurLq/Nm4AHgkQR+j9cC43vtS1J99wJ/G26PAMYmqb68OiuAZuCIJNY3pK8t7gKG+I05FXg87/mtwK1FPH8j+wb8KqAh3G4AVvVVF/B4WHsD8Hre/iuA70ZU6y+A9yexRqAWeBl4X5LqA6YCvwXOYW/AJ6m+tewf8ImoD6gD3iIcyJG0+nrVdD7wbFLrG8qj1LtopgB/yXu+PtwXl4nuvgkg/Dgh3N9fnVPC7d77C8rMGoETCFrJiakx7P54BdgCPOnuiaoPuAP4HJDL25ek+hx4wswWm9m1CavvSKAFuDvs4vq+mY1MUH35LgceDLeTWN+glXrA99XXlcRxn/3VGXn9ZjYK+Clwk7u3HujQfmqJrEZ3z7r7PIKW8slmNjcp9ZnZRcAWd198sC/pp44ov8enu/t8YCFwg5ktOMCxxa6vkqAL8053PwFoJ+jy6E8svyNmNgL4EPCfAx3aTx2JzqBSD/j1wLS851OBjTHVArDZzBoAwo9bwv391bk+3O69vyDMbDhBuN/v7j9LYo0A7r4deAr4YILqOx34kJmtBf4DOMfMfpyg+nD3jeHHLcDDwMkJqm89sD78qwzgIYLAT0p9PRYCL7v75vB50uobklIP+JeAmWY2I/yf+HLglzHW80vgk+H2Jwn6vXv2X25mVWY2A5gJvBj+CdhmZqeEV96vynvNkITv9wNgpbv/W9JqNLN6MxsbbtcA5wGvJ6U+d7/V3ae6eyPBz9Xv3P3KpNRnZiPNbHTPNkE/8rKk1OfuzcBfzGxWuOtcYEVS6stzBXu7Z3rqSFJ9QxP3RYChPoALCEaIvAl8oYjnfRDYBHQT/C/+X4DDCS7KrQ4/jss7/gthjavIu8oONBH8Yr4JfIteF6WGUN8ZBH8qLgVeCR8XJKVG4DhgSVjfMuC2cH8i6utV61+x9yJrIuoj6ON+NXws7/nZT0p94fvOAxaF3+OfA4clrL5a4B1gTN6+xNRXiIemKhARSalS76IREZF+KOBFRFJKAS8iklIKeBGRlFLAi4iklAJeEsvMvmDBTJNLwxn/3neIr7/azCYf4msaLW+G0F6fuz2s5/ZDec/wtfPM7IJDfZ3IUFTGXYBIX8zsVOAighkxu8xsPMGMhAf7+grgaoLxyYW6s/A6oN7duwbx2nkE46UfPdgXhDfOmLvnBjxYpA9qwUtSNQBbe8LU3bd6eGu+mZ0bTmD1mgXz8leF+9ea2W1m9gzBHYpNwP1h678mnLf7D+HkXI/n3ZJ+ogXz0j8H3NBXMWb2S2Ak8IKZXRbeiftTM3spfJweHneymf0prO9PFsw7PgL4MnBZWMtlZvYlM/v7vPdfFv710GjB3OnfJphhc5qZfTY8x1IL580XOShx32mlhx59PYBRBHffvgF8Gzgr3F9NMKvfe8PnPyKYSA2C6XM/l/ceTwFN4fZw4E8ELXCAy4AfhttL897/dvKmgO5V08687QeAM8Lt6QRTQkAwTW5luH0e8NNw+2rgW3mv/xLw93nPlxFMP91IMHvlKeH+8wkWfDaCBtkjwIK4vz96lMZDXTSSSO6+08xOBM4EzgZ+YsGKXUuAt9z9jfDQewla3XeEz3/Sz1vOAuYCTwY9H1QAm8xsDDDW3f8QHncfwQRUAzkPmG17F++pC+eGGQPca2YzCaaKGH4wX28vb7v78+H2+eFjSfh8FME8KE8P4n2lzCjgJbHcPUvQCn/KzF4jmPzplQFe1t7PfgOWu/up++wMJjwbzHwdw4BT3b2j1/t9E/i9u3/Egnn4n+rn9Rn27SKtztvO/xoM+Iq7f3cQNUqZUx+8JFLYdz0zb9c84G2CGScbzew94f5PAH/o/fpQG8FyhRBMEFUfXrzFzIab2RwPpireYWZnhMf9zUGW+ATw3/LqnRdujgE2hNtX91MLBN1J88PXzidYBq4vjwOfsmBef8xsiplN6OdYkX0o4CWpRhF0dawws6UEa2J+yd07gWuA/wxb9TngO/28xz3AdyxYNaoCuBT4qpm9SvCXwGnhcdcA/ye8yNrR1xv14UagKbzwuQK4Ptz/NeArZvZseM4evyfo0nnFzC4jmKd/XFjbfyW41rAfd3+CoL//ufDrfYh9/6MQ6ZdmkxQRSSm14EVEUkoBLyKSUgp4EZGUUsCLiKSUAl5EJKUU8CIiKaWAFxFJqf8PTxn1/PXQmpkAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "pyplot.plot(np.sort(IDF))\n",
    "pyplot.ylabel('IDF')\n",
    "pyplot.xlabel('Sorted feature')\n",
    "pyplot.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Highest ranked features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "feature_index_to_original_ID_dict = {value:key for key,value in feature_original_ID_to_index_dict.items()}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dance\n",
      "Nicholas Meyer\n",
      "John Woo\n",
      "solidarity\n",
      "author:Laurence Stern\n",
      "utah\n",
      "transexual\n",
      "Barry Sonnenfeld\n",
      "Peter Cattaneo\n",
      "Bonneville Salt Flats\n",
      "legal system\n",
      "Juliette Binoche and Daniel Auteuil also together in Hidden (a.k.a. Cache) (2005)\n",
      "score:George Fenton\n",
      "tear jerker\n",
      "R:brief language\n",
      "RRobert Carlyle\n",
      "great war depiction\n",
      "champagne\n",
      "based upon a true story\n",
      "colonial power\n"
     ]
    }
   ],
   "source": [
    "sorted_features = np.argsort(-IDF)\n",
    "\n",
    "highest_ranked_features = sorted_features[:20]\n",
    "\n",
    "for feature_index in highest_ranked_features:\n",
    "    print(feature_index_to_original_ID_dict[feature_index])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Lowest ranked features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "rats\n",
      "melancholy\n",
      "comic book\n",
      "jfd\n",
      "willem defoe\n",
      "inspirational\n",
      "crapflix\n",
      "sweet story\n",
      "Seen 2008\n",
      "Nostalgic\n",
      "Michael Crichton\n",
      "bond\n",
      "gratuitous sex\n",
      "los angeles\n",
      "biography\n",
      "Fate\n",
      "goth\n",
      "moon\n",
      "Shakespeare\n",
      "creepy\n"
     ]
    }
   ],
   "source": [
    "lowest_ranked_features = sorted_features[-20:]\n",
    "\n",
    "for feature_index in lowest_ranked_features:\n",
    "    print(feature_index_to_original_ID_dict[feature_index])    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<10681x10681 sparse matrix of type '<class 'numpy.float64'>'\n",
       "\twith 10681 stored elements (1 diagonals) in DIAgonal format>"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from scipy.sparse import diags\n",
    "diags(IDF)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "ICM_idf = ICM_all.copy()\n",
    "\n",
    "ICM_idf = diags(IDF)*ICM_idf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ItemKNNCBFRecommender: URM Detected 1689 (2.36 %) cold users.\n",
      "ItemKNNCBFRecommender: URM Detected 56 (0.52 %) cold items.\n",
      "Similarity column 10681 ( 100 % ), 13567.67 column/sec, elapsed time 0.01 min\n",
      "EvaluatorHoldout: Processed 51000 ( 73.06% ) in 30.14 sec. Users per second: 1692\n",
      "EvaluatorHoldout: Processed 69804 ( 100.00% ) in 42.06 sec. Users per second: 1660\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'ROC_AUC': 0.29724839357456884,\n",
       " 'PRECISION': 0.10787777204746536,\n",
       " 'PRECISION_RECALL_MIN_DEN': 0.11566533118128865,\n",
       " 'RECALL': 0.04996879018331518,\n",
       " 'MAP': 0.06091772193127482,\n",
       " 'MRR': 0.26299144407062347,\n",
       " 'NDCG': 0.07400334525182471,\n",
       " 'F1': 0.06830078122324963,\n",
       " 'HIT_RATE': 1.0787777204744713,\n",
       " 'ARHR': 0.38060994821781524,\n",
       " 'NOVELTY': 0.011202205243061985,\n",
       " 'AVERAGE_POPULARITY': 0.2770624137117382,\n",
       " 'DIVERSITY_MEAN_INTER_LIST': 0.9504522710845807,\n",
       " 'DIVERSITY_HERFINDAHL': 0.9950438655070153,\n",
       " 'COVERAGE_ITEM': 0.4643759947570452,\n",
       " 'COVERAGE_ITEM_CORRECT': 0.10504634397528322,\n",
       " 'COVERAGE_USER': 0.9753657411935669,\n",
       " 'COVERAGE_USER_CORRECT': 0.4880601394497464,\n",
       " 'DIVERSITY_GINI': 0.05328025084836199,\n",
       " 'SHANNON_ENTROPY': 9.085541126327955}"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "recommender_idf = ItemKNNCBFRecommender(URM_train, ICM_idf)\n",
    "recommender_idf.fit(shrink=0.0, topK=50)\n",
    "\n",
    "result_dict, _ = evaluator_test.evaluateRecommender(recommender_idf)\n",
    "result_dict[10]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### There is  a small gain over the non-weighted ICM. Try other feature weighting methods like BM25..."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# BM25"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "from Base.IR_feature_weighting import okapi_BM_25\n",
    "\n",
    "ICM_BM25 = ICM_all.copy().astype(np.float32)\n",
    "ICM_BM25 = okapi_BM_25(ICM_BM25)\n",
    "ICM_BM25 = ICM_BM25.tocsr()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ItemKNNCBFRecommender: URM Detected 1689 (2.36 %) cold users.\n",
      "ItemKNNCBFRecommender: URM Detected 56 (0.52 %) cold items.\n",
      "Similarity column 10681 ( 100 % ), 11635.44 column/sec, elapsed time 0.02 min\n",
      "EvaluatorHoldout: Processed 51000 ( 73.06% ) in 30.18 sec. Users per second: 1690\n",
      "EvaluatorHoldout: Processed 69804 ( 100.00% ) in 40.42 sec. Users per second: 1727\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'ROC_AUC': 0.2272203729399166,\n",
       " 'PRECISION': 0.06837287261474506,\n",
       " 'PRECISION_RECALL_MIN_DEN': 0.07357764438841391,\n",
       " 'RECALL': 0.03315653785806955,\n",
       " 'MAP': 0.0374806297584825,\n",
       " 'MRR': 0.18835191484002084,\n",
       " 'NDCG': 0.04846830394310805,\n",
       " 'F1': 0.044657163451624154,\n",
       " 'HIT_RATE': 0.6837287261474987,\n",
       " 'ARHR': 0.25374253692653387,\n",
       " 'NOVELTY': 0.01275358653658701,\n",
       " 'AVERAGE_POPULARITY': 0.1570733977111268,\n",
       " 'DIVERSITY_MEAN_INTER_LIST': 0.9732725532941416,\n",
       " 'DIVERSITY_HERFINDAHL': 0.9973258610360308,\n",
       " 'COVERAGE_ITEM': 0.49499110570171334,\n",
       " 'COVERAGE_ITEM_CORRECT': 0.1229285647411291,\n",
       " 'COVERAGE_USER': 0.9753657411935669,\n",
       " 'COVERAGE_USER_CORRECT': 0.364413766121257,\n",
       " 'DIVERSITY_GINI': 0.08374047863613968,\n",
       " 'SHANNON_ENTROPY': 9.92718546710525}"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "recommender_bm25 = ItemKNNCBFRecommender(URM_train, ICM_BM25)\n",
    "recommender_bm25.fit(shrink=0.0, topK=50)\n",
    "\n",
    "result_dict, _ = evaluator_test.evaluateRecommender(recommender_bm25)\n",
    "result_dict[10]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Another small gain over TF-IDF"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Unnormalized similarity matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ItemKNNCBFRecommender: URM Detected 1689 (2.36 %) cold users.\n",
      "ItemKNNCBFRecommender: URM Detected 56 (0.52 %) cold items.\n",
      "EvaluatorHoldout: Processed 31000 ( 44.41% ) in 30.59 sec. Users per second: 1014\n",
      "EvaluatorHoldout: Processed 61000 ( 87.39% ) in 1.03 min. Users per second: 991\n",
      "EvaluatorHoldout: Processed 69804 ( 100.00% ) in 1.19 min. Users per second: 978\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'ROC_AUC': 0.36459233927559553,\n",
       " 'PRECISION': 0.12377943957369554,\n",
       " 'PRECISION_RECALL_MIN_DEN': 0.1342599926051701,\n",
       " 'RECALL': 0.05990707378296589,\n",
       " 'MAP': 0.07297345818751802,\n",
       " 'MRR': 0.32535928263537534,\n",
       " 'NDCG': 0.09473529870409346,\n",
       " 'F1': 0.08073825218683787,\n",
       " 'HIT_RATE': 1.237794395736634,\n",
       " 'ARHR': 0.4596577275782461,\n",
       " 'NOVELTY': 0.008518366946043004,\n",
       " 'AVERAGE_POPULARITY': 0.5836124988637034,\n",
       " 'DIVERSITY_MEAN_INTER_LIST': 0.5996835445570904,\n",
       " 'DIVERSITY_HERFINDAHL': 0.9599674953594616,\n",
       " 'COVERAGE_ITEM': 0.04550135755079113,\n",
       " 'COVERAGE_ITEM_CORRECT': 0.017788596573354555,\n",
       " 'COVERAGE_USER': 0.9753657411935669,\n",
       " 'COVERAGE_USER_CORRECT': 0.5719675269328042,\n",
       " 'DIVERSITY_GINI': 0.002440043398161816,\n",
       " 'SHANNON_ENTROPY': 5.047504722305096}"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "recommender_dot = ItemKNNCBFRecommender(URM_train, ICM_all)\n",
    "recommender_dot.W_sparse = ICM_all * ICM_all.T\n",
    "\n",
    "result_dict, _ = evaluator_test.evaluateRecommender(recommender_dot)\n",
    "result_dict[10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
